changeset 5:8952990fcab9

Update to snpEff version 3.4 and add data managers to download snpEff genome reference databases
author Jim Johnson <jj@umn.edu>
date Wed, 27 Nov 2013 09:11:32 -0600
parents 47ddc9f4d0b6
children eb394dd65c98
files README data_manager/data_manager_snpEff_databases.xml data_manager/data_manager_snpEff_download.py data_manager/data_manager_snpEff_download.xml datatypes_conf.xml lib/galaxy/datatypes/snpeff.py snpEff.xml snpEff_download.xml snpSift_annotate.xml snpSift_caseControl.xml snpSift_filter.xml snpSift_int.xml tool-data/snpeff_genomedb.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 15 files changed, 597 insertions(+), 408 deletions(-) [+]
line wrap: on
line diff
--- a/README	Mon Oct 07 10:59:44 2013 -0500
+++ b/README	Wed Nov 27 09:11:32 2013 -0600
@@ -1,4 +1,6 @@
-These are galaxy tools for SnpEff ( http://snpeff.sourceforge.net/ )
+These are galaxy tools for SnpEff a variant annotation and effect prediction tool by Pablo Cingolani. 
+It annotates and predicts the effects of variants on genes (such as amino acid changes).
+( http://snpeff.sourceforge.net/ )
 
 This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift.   
 
@@ -15,4 +17,8 @@
   data_manager_snpeff_databases - generates a list of available SnpEff genome databases into the tool-data/snpeff_databases.loc 
   data_manager_snpeff_download - downloads a SnpEff genome database selected from: tool-data/snpeff_databases.loc and adds entries to snpeff_genomedb.loc,snpeff_regulationdb.loc,snpeff_annotations.loc 
 
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
 
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
--- a/data_manager/data_manager_snpEff_databases.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/data_manager/data_manager_snpEff_databases.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,7 +1,7 @@
-<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.3" tool_type="manage_data">
+<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.4" tool_type="manage_data">
 	<description>Read the list of available snpEff databases</description>
 	<requirements>
-		<requirement type="package" version="3.3">snpEff</requirement>
+		<requirement type="package" version="3.4">snpEff</requirement>
 	</requirements>
 	<command interpreter="python">
         data_manager_snpEff_databases.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar "$out_file"
@@ -33,6 +33,9 @@
 
 For information about snpEff:    http://snpEff.sourceforge.net
 
+Please cite:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
 	</help>
 </tool>
 
--- a/data_manager/data_manager_snpEff_download.py	Mon Oct 07 10:59:44 2013 -0500
+++ b/data_manager/data_manager_snpEff_download.py	Wed Nov 27 09:11:32 2013 -0600
@@ -39,24 +39,15 @@
     ## Note: Since version 2.1 you can use tilde ('~') as first character to refer to your home directory
     ##---
     #data_dir = ~/snpEff/data/
-    data_dir = None
-    try:
-        fh = open(config)
-        for i,line in enumerate(fh):
-            if line.strip().startswith('data_dir'):
-                (k,v) = line.split('=')
-                data_dir = os.path.expanduser(v.strip())
-                break
-    except Exception, e:
-        stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
-    else:
-        fh.close()
+    data_dir = target_directory
     (snpEff_dir,snpEff_jar) = os.path.split(jar_path)
     args = [ 'java','-jar' ]
     args.append( jar_path )
     args.append( 'download' )
-    # args.append( '-c' )
-    # args.append( 'config' )
+    args.append( '-c' )
+    args.append( config )
+    args.append( '-dataDir' )
+    args.append( data_dir )
     args.append( '-v' )
     args.append( genome_version )
     proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir )
@@ -74,7 +65,7 @@
                 if fname.startswith('snpEffectPredictor'):
                     # if snpEffectPredictor.bin download succeeded
                     name = genome_version + (' : ' + organism if organism else '') 
-                    data_table_entry = dict(value=genome_version, name=name)
+                    data_table_entry = dict(value=genome_version, name=name, path=data_dir)
                     _add_data_table_entry( data_manager_dict, 'snpeff_genomedb', data_table_entry )
                 else:
                     m = re.match(regulation_pattern,fname)
--- a/data_manager/data_manager_snpEff_download.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/data_manager/data_manager_snpEff_download.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,7 +1,7 @@
-<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.3" tool_type="manage_data">
+<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.4" tool_type="manage_data">
     <description>Download a new database</description>
     <requirements>
-        <requirement type="package" version="3.3">snpEff</requirement>
+        <requirement type="package" version="3.4">snpEff</requirement>
     </requirements>
     <command interpreter="python">
         data_manager_snpEff_download.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar --config \$SNPEFF_JAR_PATH/snpEff.config 
@@ -11,7 +11,9 @@
         </command>
     <inputs>
         <param name="genome_databases" type="select" label="Genome Version">
-            <options from_data_table="snpeff_databases"/>
+            <options from_data_table="snpeff_databases">
+                <filter type="sort_by" column="0" />
+            </options>
         </param>
     </inputs>
 
@@ -41,6 +43,9 @@
 
 For details about this tool, please go to http://snpEff.sourceforge.net
 
+Please cite:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
     </help>
 </tool>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="snpeff.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="snpeffdb" type="galaxy.datatypes.snpeff:SnpEffDb" display_in_upload="True"/>
+    </registration>
+</datatypes>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/snpeff.py	Wed Nov 27 09:11:32 2013 -0600
@@ -0,0 +1,46 @@
+"""
+SnpEff datatypes
+"""
+import os,os.path,re,sys
+import galaxy.datatypes.data
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.metadata import MetadataElement
+
+class SnpEffDb( Text ):
+    """Class describing an IGV tiled data file (TDF) .tdf  binary file"""
+    file_ext = "snpeffdb"
+    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
+    MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[] )
+    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
+
+    def __init__( self, **kwd ):
+        Text.__init__( self, **kwd )
+
+    def set_meta( self, dataset, **kwd ):
+        Text.set_meta(self, dataset, **kwd )
+        data_dir = dataset.extra_files_path
+        ## search data_dir/genome_version for files
+        regulation_pattern = 'regulation_(.+).bin'
+        #  annotation files that are included in snpEff by a flag
+        annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
+        regulations = []
+        annotations = []
+        if data_dir and os.path.isdir(data_dir):
+            for root, dirs, files in os.walk(data_dir):
+                for fname in files:
+                    if fname.startswith('snpEffectPredictor'):
+                        # if snpEffectPredictor.bin download succeeded
+                        genome_version = os.path.basename(root)
+                        dataset.metadata.genome_version = genome_version
+                    else:
+                        m = re.match(regulation_pattern,fname)
+                        if m:
+                            name = m.groups()[0]
+                            regulations.append(name)
+                        elif fname in annotations_dict:
+                            value = annotations_dict[fname]
+                            name = value.lstrip('-')
+                            annotations.append(name)
+            dataset.metadata.regulation = regulations
+            dataset.metadata.annotation = annotations
+
--- a/snpEff.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpEff.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,74 +1,12 @@
-<tool id="snpEff" name="SnpEff" version="3.3">
-	<description>Variant effect and annotation</description>
-	<!-- 
-	    You will need to change the path to wherever your installation is.
-		You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
-	<command>java -Xmx6G -jar /path/to/your/snpEff/snpEff.jar eff -c /path/to/your/snpEff/snpEff/snpEff.config $inputFormat $offset -upDownStreamLen $udLength $filterIn $filterHomHet -no $filterOut -stats $statsFile $genomeVersion $input > $output </command>
-Options:
-        -a , -around            : Show N codons and amino acids around change (only in coding regions). Default is 0 codons.
-        -i <format>             : Input format [ vcf, txt, pileup, bed ]. Default: VCF.
-        -o <format>             : Ouput format [ txt, vcf, gatk, bed, bedAnn ]. Default: VCF.
-        -interval               : Use a custom interval file (you may use this option many times)
-        -chr <string>           : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output.
-        -s,  -stats             : Name of stats file (summary). Default is 'snpEff_summary.html'
-        -t                      : Use multiple threads (implies '-noStats'). Default 'off'
-
-Sequence change filter options:
-        -del                    : Analyze deletions only
-        -ins                    : Analyze insertions only
-        -hom                    : Analyze homozygous variants only
-        -het                    : Analyze heterozygous variants only
-        -minQ X, -minQuality X  : Filter out variants with quality lower than X
-        -maxQ X, -maxQuality X  : Filter out variants with quality higher than X
-        -minC X, -minCoverage X : Filter out variants with coverage lower than X
-        -maxC X, -maxCoverage X : Filter out variants with coverage higher than X
-        -nmp                    : Only MNPs (multiple nucleotide polymorphisms)
-        -snp                    : Only SNPs (single nucleotide polymorphisms)
-
-Results filter options:
-        -fi  <bedFile>                  : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times)
-        -no-downstream                  : Do not show DOWNSTREAM changes
-        -no-intergenic                  : Do not show INTERGENIC changes
-        -no-intron                      : Do not show INTRON changes
-        -no-upstream                    : Do not show UPSTREAM changes
-        -no-utr                         : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes
-
-Annotations options:
-        -cancer                         : Perform 'cancer' comparissons (Somatic vs Germline). Default: false
-        -canon                          : Only use canonical transcripts.
-        -geneId                         : Use gene ID instead of gene name (VCF output). Default: false
-        -hgvs                           : Use HGVS annotations for amino acid sub-field. Default: false
-        -lof                            : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.
-        -reg <name>                     : Regulation track to use (this option can be used add several times).
-        -oicr                           : Add OICR tag in VCF file. Default: false
-        -onlyReg                        : Only use regulation tracks.
-        -onlyTr <file.txt>              : Only use the transcripts in this file. Format: One transcript ID per line.
-        -sequenceOntolgy                : Use Sequence Ontolgy terms. Default: false
-        -ss, -spliceSiteSize <int>      : Set size for splice sites (donor and acceptor) in bases. Default: 2
-        -ud, -upDownStreamLen <int>     : Set upstream downstream interval length (in bases)
-
-Generic options:
-        -0                      : File positions are zero-based (same as '-inOffset 0 -outOffset 0')
-        -1                      : File positions are one-based (same as '-inOffset 1 -outOffset 1')
-        -c , -config            : Specify config file
-        -h , -help              : Show this help and exit
-        -if, -inOffset          : Offset input by a number of bases. E.g. '-inOffset 1' for one-based input files
-        -of, -outOffset         : Offset output by a number of bases. E.g. '-outOffset 1' for one-based output files
-        -noLog                  : Do not report usage statistics to server
-        -noStats                : Do not create stats (summary) file
-        -q , -quiet             : Quiet mode (do not show any messages or errors)
-        -v , -verbose           : Verbose mode
-
-	-->
-	<requirements>
-		<requirement type="package" version="3.3">snpEff</requirement>
-	</requirements>
-	<command>
-SNPEFF_DATA_DIR=`grep '^data_dir' \$SNPEFF_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`;
-eval "if [ ! -e \$SNPEFF_DATA_DIR/$genomeVersion ] ;
-then java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar download  -c \$SNPEFF_JAR_PATH/snpEff.config $genomeVersion ;
-fi";
-java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar eff -c \$SNPEFF_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength 
+<tool id="snpEff" name="SnpEff" version="3.4">
+    <description>Variant effect and annotation</description>
+    <requirements>
+        <requirement type="package" version="3.4">snpEff</requirement>
+    </requirements>
+    <command>
+java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar eff 
+-c \$SNPEFF_JAR_PATH/snpEff.config 
+-i $inputFormat -o $outputFormat -upDownStreamLen $udLength 
 #if $spliceSiteSize and $spliceSiteSize.__str__ != '':
   -spliceSiteSize $spliceSiteSize
 #end if
@@ -79,15 +17,11 @@
   $filterHomHet 
 #end if
 #if $annotations and $annotations.__str__ != '':
+  #echo " "
   #echo ' '.join($annotations.__str__.split(','))
 #end if
-#if $extra_annotations and $extra_annotations.__str__ != '':
-  #echo ' '.join($extra_annotations.__str__.split(','))
-#end if
-#if $regulation and $regulation.__str__ != '':
-  -reg #echo ' -reg '.join($regulation.__str__.split(','))#
-#end if
 #if $filterOut and $filterOut.__str__ != '':
+  #echo " "
   #echo ' '.join($filterOut.__str__.split(','))
 #end if
 #if str( $transcripts ) != 'None':
@@ -105,228 +39,296 @@
 #if $chr.__str__.strip() != '':
   -chr "$chr" 
 #end if
-  $noLog $genomeVersion $input > $snpeff_output 
+  $noLog 
+#if $snpDb.genomeSrc == 'cached':
+  -dataDir ${snpDb.genomeVersion.fields.path}
+  #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != '':
+    #echo " "
+    #echo ' '.join($snpDb.extra_annotations.__str__.split(','))
+  #end if
+  #if $snpDb.regulation and $snpDb.regulation.__str__ != '':
+    -reg #echo ' -reg '.join($snpDb.regulation.__str__.split(','))#
+  #end if
+  $snpDb.genomeVersion
+#elif $snpDb.genomeSrc == 'history':
+  -dataDir ${snpDb.snpeff_db.extra_files_path}
+  #if $snpDb.extra_annotations and $snpDb.extra_annotations.__str__ != '':
+    #set annotations = [' '] + $snpDb.extra_annotations.__str__.split(',')
+    #echo " "
+    #echo ' -'.join($annotations)
+  #end if
+  #if $snpDb.regulation and $snpDb.regulation.__str__ != '':
+    -reg #echo ' -reg '.join($snpDb.regulation.__str__.split(','))#
+  #end if
+  ${snpDb.snpeff_db.metadata.genome_version}
+#else 
+  -download
+  $snpDb.genome_version
+#end if
+$input > $snpeff_output 
 </command>
-	<inputs>
-		<param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>
-
-		<param name="inputFormat" type="select" label="Input format">
-			<option value="vcf" selected="true">VCF</option>
-			<option value="txt">Tabular (Deprecated)</option>
-			<option value="pileup">Pileup (Deprecated)</option>
-			<option value="bed">BED (Deprecated)</option>
-		</param>
-
-		<param name="outputFormat" type="select" label="Output format">
-			<option value="vcf" selected="true">VCF (only if input is VCF)</option>
-			<option value="txt">Tabular</option>
-			<option value="bed">BED</option>
-			<option value="bedAnn">BED Annotations</option>
-		</param>
-
-		<param name="genomeVersion" type="select" label="Genome">
-                        <!--GENOME	DESCRIPTION-->
-                        <options from_data_table="snpeff_genomedb"/>
-		</param>
+    <inputs>
+        <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/>
 
-		<param name="udLength" type="select" label="Upstream / Downstream length">
-			<option value="0">No upstream / downstream intervals (0 bases)</option>
-			<option value="200">200 bases</option>
-			<option value="500">500 bases</option>
-			<option value="1000">1000 bases</option>
-			<option value="2000">2000 bases</option>
-			<option value="5000" selected="true">5000 bases</option>
-			<option value="10000">10000 bases</option>
-			<option value="20000">20000 bases</option>
-		</param>
+        <param name="inputFormat" type="select" label="Input format">
+            <option value="vcf" selected="true">VCF</option>
+            <option value="txt">Tabular (Deprecated)</option>
+            <option value="pileup">Pileup (Deprecated)</option>
+            <option value="bed">BED (Deprecated)</option>
+        </param>
 
-		<param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases. Default: 2">
-			<option value="1">1 base</option>
-			<option value="2">2 bases</option>
-			<option value="3">3 bases</option>
-			<option value="4">4 bases</option>
-			<option value="5">5 bases</option>
-			<option value="6">6 bases</option>
-			<option value="7">7 bases</option>
-			<option value="8">8 bases</option>
-			<option value="9">9 bases</option>
-		</param>
-
-        	<param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
-			<option value="no_filter" selected="true">No filter (analyze everything)</option>
-			<option value="-hom">Analyze homozygous sequence changes only </option>
-			<option value="-het">Analyze heterozygous sequence changes only </option>
-        	</param>
+        <param name="outputFormat" type="select" label="Output format">
+            <option value="vcf" selected="true">VCF (only if input is VCF)</option>
+            <option value="txt">Tabular</option>
+            <option value="bed">BED</option>
+            <option value="bedAnn">BED Annotations</option>
+        </param>
 
-                <!-- The tool testing code can not handle select,radio,check boxes values that start with '-', so the '-' is added in the command generation -->
-        	<param name="filterIn" type="select" display="radio" label="Filter sequence changes">
-			<option value="no_filter" selected="true">No filter (analyze everything)</option>
-			<option value="-del">Analyze deletions only </option>
-			<option value="-ins">Analyze insertions only </option>
-			<option value="-mnp">Only MNPs (multiple nucleotide polymorphisms) </option>
-			<option value="-snp">Only SNPs (single nucleotide polymorphisms) </option>
-        	</param>
-
-        	<param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">
-			<option value="-cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option>
-                        <option value="-canon">Only use canonical transcripts.</option>
-                        <option value="-geneId">Use gene ID instead of gene name (VCF output). Default: false</option>
-                        <option value="-hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option>
-                        <option value="-lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option>
-                        <option value="-oicr">Add OICR tag in VCF file. Default: false</option>
-                        <option value="-onlyReg">Only use regulation tracks.</option>
-                        <option value="-sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option>
-        	</param>
-
-        	<param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional Annotations">
+        <conditional name="snpDb">
+            <param name="genomeSrc" type="select" label="">
+                <option value="cached">Locally cached</option>
+                <option value="history">history</option>
+                <option value="named">named on demand</option>
+            </param>
+            <when value="cached">
+                <param name="genomeVersion" type="select" label="Genome">
+                    <!--GENOME    DESCRIPTION-->
+                    <options from_data_table="snpeff_genomedb"/>
+                </param>
+                <param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional Annotations">
                        <help>These are available for only a few genomes</help>
-                       <!--GENOME	REG_NAME  -->
                        <options from_data_table="snpeff_annotations">
-                               <filter type="param_value" ref="genomeVersion" key="genome" column="0" />
+                           <filter type="param_value" ref="genomeVersion" key="genome" column="0" />
+                       </options>
+                </param>
+                <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory Annotation">
+                       <help>These are available for only a few genomes</help>
+                       <options from_data_table="snpeff_regulationdb">
+                           <filter type="param_value" ref="genomeVersion" key="genome" column="0" />
                        </options>
-        	</param>
-
-        	<param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory Annotation">
-                       <help>These are available for only a few genomes</help>
-                       <!--GENOME	REG_NAME  -->
-                       <options from_data_table="snpeff_regulationdb">
-                               <filter type="param_value" ref="genomeVersion" key="genome" column="0" />
-                       </options>
-        	</param>
-
-        	<param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/>
-        	<param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file. Format: One transcript ID per line."/>
+                </param>
+            </when>
+            <when value="history">
+                <param format="snpeffdb" name="snpeff_db" type="data" label="SnpEff Genome Version Data"/>
+                <!-- From metadata -->
+                <param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional Annotations">
+                    <help>These are available for only a few genomes</help>
+                    <options>
+                        <filter type="data_meta" ref="snpeff_db" key="annotation" />
+                    </options>
+                </param>
+                <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory Annotation">
+                    <help>These are available for only a few genomes</help>
+                    <options>
+                        <filter type="data_meta" ref="snpeff_db" key="regulation" />
+                    </options>
+                </param>
+            </when>
+            <when value="named">
+                <param name="genome_version" type="text" value="GRCh37.68" label="Snpff Version Name"/>
+            </when>
+        </conditional>
 
-        	<param name="filterOut" type="select" display="checkboxes" multiple="true" label="Filter output">
-			<option value="-no-downstream">Do not show DOWNSTREAM changes </option>
-			<option value="-no-intergenic">Do not show INTERGENIC changes </option>
-			<option value="-no-intron">Do not show INTRON changes </option>
-			<option value="-no-upstream">Do not show UPSTREAM changes </option>
-			<option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option>
-        	</param>
+        <param name="udLength" type="select" label="Upstream / Downstream length">
+            <option value="0">No upstream / downstream intervals (0 bases)</option>
+            <option value="200">200 bases</option>
+            <option value="500">500 bases</option>
+            <option value="1000">1000 bases</option>
+            <option value="2000">2000 bases</option>
+            <option value="5000" selected="true">5000 bases</option>
+            <option value="10000">10000 bases</option>
+            <option value="20000">20000 bases</option>
+        </param>
 
-        	<param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
-			<option value="" selected="true">Use default (based on input type)</option>
-			<option value="-0">Force zero-based positions (both input and output)</option>
-			<option value="-1">Force one-based positions (both input and output)</option>
-		</param>
-        	<param name="chr" type="text" optionl="true" label="Text to prepend to chromosome name" help="By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'.  You can prepend any string you want to the chromosome name.">
-                                       <validator type="regex" message="No whitespace allows">^\S*$</validator>
+        <param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases. Default: 2">
+            <option value="1">1 base</option>
+            <option value="2">2 bases</option>
+            <option value="3">3 bases</option>
+            <option value="4">4 bases</option>
+            <option value="5">5 bases</option>
+            <option value="6">6 bases</option>
+            <option value="7">7 bases</option>
+            <option value="8">8 bases</option>
+            <option value="9">9 bases</option>
+        </param>
+
+        <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes">
+            <option value="no_filter" selected="true">No filter (analyze everything)</option>
+            <option value="-hom">Analyze homozygous sequence changes only </option>
+            <option value="-het">Analyze heterozygous sequence changes only </option>
+        </param>
+
+        <!-- The tool testing code can not handle select,radio,check boxes values that start with '-', so the '-' is added in the command generation -->
+        <param name="filterIn" type="select" display="radio" label="Filter sequence changes">
+            <option value="no_filter" selected="true">No filter (analyze everything)</option>
+            <option value="-del">Analyze deletions only </option>
+            <option value="-ins">Analyze insertions only </option>
+            <option value="-mnp">Only MNPs (multiple nucleotide polymorphisms) </option>
+            <option value="-snp">Only SNPs (single nucleotide polymorphisms) </option>
+        </param>
 
-		</param>
-        	<param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
-        	<param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Do not report usage statistics to server"/>
-	</inputs>
-	<outputs>
-		<data format="vcf" name="snpeff_output" >
-			<change_format>
-				<when input="outputFormat" value="vcf" format="vcf" />
-				<when input="outputFormat" value="txt" format="tabular" />
-				<when input="outputFormat" value="bed" format="bed" />
-				<when input="outputFormat" value="bedAnn" format="bed" />
-			</change_format>
-		</data>
-                
-		<data format="html" name="statsFile">
-                    <filter>generate_stats == True</filter>
-                </data>
-	</outputs>
-        <stdio>
-          <exit_code range="1:"  level="fatal"   description="Error" />
-          <exit_code range="-1"  level="fatal"   description="Error: Cannot open file" />
-        </stdio>
-        <tests>
-            <test>
-                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-                <param name="inputFormat" value="vcf"/>
-                <param name="outputFormat" value="vcf"/>
-                <param name="genomeVersion" value="testCase"/>
-                <param name="udLength" value="0"/>
-                <param name="filterHomHet" value="no_filter"/>
-                <param name="filterIn" value="no_filter"/>
-                <param name="generate_stats" value="False"/>
-                <param name="filterOut" value="+-no-upstream"/>
-                <output name="snpeff_output">
-                    <assert_contents>
-                        <!-- Check that an effect was added -->
-                        <has_text text="EFF=" />
-                    </assert_contents>
-                </output>
-                        <!-- Check for a HTML header indicating that this was successful -->
-                <!--
-                <output name="statsFile">
-                    <assert_contents>
-                        <has_text text="SnpEff: Variant analysis" />
-                    </assert_contents>
-                </output>
-                --> 
-            </test>
+        <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options">
+            <option value="-cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option>
+            <option value="-canon">Only use canonical transcripts.</option>
+            <option value="-geneId">Use gene ID instead of gene name (VCF output). Default: false</option>
+            <option value="-hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option>
+            <option value="-lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option>
+            <option value="-oicr">Add OICR tag in VCF file. Default: false</option>
+            <option value="-onlyReg">Only use regulation tracks.</option>
+            <option value="-sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option>
+        </param>
+        <param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/>
+        <param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file. Format: One transcript ID per line."/>
+        <param name="filterOut" type="select" display="checkboxes" multiple="true" label="Filter output">
+            <option value="-no-downstream">Do not show DOWNSTREAM changes </option>
+            <option value="-no-intergenic">Do not show INTERGENIC changes </option>
+            <option value="-no-intron">Do not show INTRON changes </option>
+            <option value="-no-upstream">Do not show UPSTREAM changes </option>
+            <option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option>
+        </param>
+
+        <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position">
+            <option value="" selected="true">Use default (based on input type)</option>
+            <option value="-0">Force zero-based positions (both input and output)</option>
+            <option value="-1">Force one-based positions (both input and output)</option>
+        </param>
+        <param name="chr" type="text" optionl="true" label="Text to prepend to chromosome name">
+              <help>
+               By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'.  
+               You can prepend any string you want to the chromosome name."
+              </help>
+                       <validator type="regex" message="No whitespace allows">^\S*$</validator>
 
-            <test>
-                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-                <param name="inputFormat" value="vcf"/>
-                <param name="outputFormat" value="vcf"/>
-                <param name="genomeVersion" value="testCase"/>
-                <param name="udLength" value="0"/>
-                <param name="filterHomHet" value="+-het"/>
-                <param name="filterIn" value="no_filter"/>
-                <!--
-                <param name="filterOut" value=""/>
-                -->
-                <param name="generate_stats" value="False"/>
-                <output name="snpeff_output">
-                    <assert_contents>
-                        <!-- Check that NO effects were added since -het is set -->
-                        <not_has_text text="EFF=NON_SYNONYMOUS_CODING" />
-                    </assert_contents>
-                </output>
-            </test>
-            <test>
-                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-                <param name="inputFormat" value="vcf"/>
-                <param name="outputFormat" value="vcf"/>
-                <param name="genomeVersion" value="testCase"/>
-                <param name="udLength" value="0"/>
-                <param name="filterHomHet" value="no_filter"/>
-                <param name="filterIn" value="del"/>
-                <!--
-                <param name="filterOut" value=""/>
-                -->
-                <param name="generate_stats" value="False"/>
-                <output name="snpeff_output">
-                    <assert_contents>
-                        <!-- Check that deleletions were evaluated -->
-                        <has_text_matching expression="Y\t59030478\t.*EFF=INTERGENIC" />
-                        <!-- Check that insertion on last line was NOT evaluated -->
-                        <has_text_matching expression="Y\t59032947\t.*SF=5\tGT" />
-                    </assert_contents>
-                </output>
-            </test>
-            <test>
-                <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
-                <param name="inputFormat" value="vcf"/>
-                <param name="outputFormat" value="vcf"/>
-                <param name="genomeVersion" value="testCase"/>
-                <param name="udLength" value="0"/>
-                <param name="filterHomHet" value="no_filter"/>
-                <param name="filterIn" value="no_filter"/>
-                <param name="filterOut" value="+-no-upstream"/>
-                <param name="generate_stats" value="False"/>
-                <output name="snpeff_output">
-                    <assert_contents>
-                        <!-- Check that NO UPSTREAM  effect was added -->
-                        <not_has_text text="UPSTREAM" />
-                    </assert_contents>
-                </output>
-            </test>
+        </param>
+        <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/>
+        <param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Do not report usage statistics to server"/>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="snpeff_output" >
+            <change_format>
+                <when input="outputFormat" value="vcf" format="vcf" />
+                <when input="outputFormat" value="txt" format="tabular" />
+                <when input="outputFormat" value="bed" format="bed" />
+                <when input="outputFormat" value="bedAnn" format="bed" />
+            </change_format>
+        </data>
+        
+        <data format="html" name="statsFile">
+            <filter>generate_stats == True</filter>
+        </data>
+    </outputs>
+    <stdio>
+      <exit_code range="1:"  level="fatal"   description="Error" />
+      <exit_code range="-1"  level="fatal"   description="Error: Cannot open file" />
+    </stdio>
+    <tests>
+        <!-- Check that an effect was added in out VCF -->
+        <!-- Check for a HTML header indicating that this was successful -->
+        <!--
+        <output name="statsFile">
+            <assert_contents>
+            <has_text text="SnpEff: Variant analysis" />
+            </assert_contents>
+        </output>
+        --> 
+        <!-- Setting filterOut throws exception in twilltestcase.py
+        <test>
+        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
+        <param name="inputFormat" value="vcf"/>
+        <param name="outputFormat" value="vcf"/>
+        <param name="genomeSrc" value="named"/>
+        <param name="genome_version" value="testCase"/>
+        <param name="udLength" value="0"/>
+        <param name="filterHomHet" value="no_filter"/>
+        <param name="filterIn" value="no_filter"/>
+        <param name="generate_stats" value="False"/>
+        <param name="filterOut" value="+-no-upstream"/>
+        <output name="snpeff_output">
+            <assert_contents>
+            <has_text text="EFF=" />
+            </assert_contents>
+        </output>
+        </test>
+        --> 
 
-        </tests>
-	<help>
+        <test>
+        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
+        <param name="inputFormat" value="vcf"/>
+        <param name="outputFormat" value="vcf"/>
+        <param name="genomeSrc" value="named"/>
+        <param name="genome_version" value="testCase"/>
+        <param name="udLength" value="0"/>
+        <param name="filterHomHet" value="+-het"/>
+        <param name="filterIn" value="no_filter"/>
+        <!--
+        <param name="filterOut" value=""/>
+        -->
+        <param name="generate_stats" value="False"/>
+        <output name="snpeff_output">
+            <assert_contents>
+            <!-- Check that NO effects were added since -het is set -->
+            <not_has_text text="EFF=NON_SYNONYMOUS_CODING" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <test>
+        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
+        <param name="inputFormat" value="vcf"/>
+        <param name="outputFormat" value="vcf"/>
+        <param name="genomeSrc" value="named"/>
+        <param name="genome_version" value="testCase"/>
+        <param name="udLength" value="0"/>
+        <param name="filterHomHet" value="no_filter"/>
+        <param name="filterIn" value="del"/>
+        <!--
+        <param name="filterOut" value=""/>
+        -->
+        <param name="generate_stats" value="False"/>
+        <output name="snpeff_output">
+            <assert_contents>
+            <!-- Check that deleletions were evaluated -->
+            <has_text_matching expression="Y\t59030478\t.*EFF=INTERGENIC" />
+            <!-- Check that insertion on last line was NOT evaluated -->
+            <has_text_matching expression="Y\t59032947\t.*SF=5\tGT" />
+            </assert_contents>
+        </output>
+        </test>
+
+        <!-- Check that NO UPSTREAM  effect was added -->
+        <!-- Setting filterOut throws exception in twilltestcase.py
+        <test>
+        <param name="input" ftype="vcf" value="vcf_homhet.vcf"/>
+        <param name="inputFormat" value="vcf"/>
+        <param name="outputFormat" value="vcf"/>
+        <param name="genomeSrc" value="named"/>
+        <param name="genome_version" value="testCase"/>
+        <param name="udLength" value="0"/>
+        <param name="filterHomHet" value="no_filter"/>
+        <param name="filterIn" value="no_filter"/>
+        <param name="filterOut" value="+-no-upstream"/>
+        <param name="generate_stats" value="False"/>
+        <output name="snpeff_output">
+            <assert_contents>
+            <not_has_text text="UPSTREAM" />
+            </assert_contents>
+        </output>
+        </test>
+        -->
+
+    </tests>
+    <help>
 
 This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.
 
 For details about this tool, please go to http://snpEff.sourceforge.net
 
-	</help>
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+    </help>
 </tool>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_download.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -0,0 +1,34 @@
+<tool id="snpEff_download" name="SnpEff Download" version="3.4">
+    <description>Download a new database</description>
+    <requirements>
+        <requirement type="package" version="3.4">snpEff</requirement>
+    </requirements>
+    <command>
+    echo $genomeVersion > $snpeff_db; 
+    java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config -dataDir $snpeff_db.extra_files_path -v $genomeVersion > $logfile 
+    </command>
+    <inputs>
+        <param name="genomeVersion" type="select" label="Genome Version">
+            <options from_data_table="snpeff_databases"/>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="logfile" />
+        <data format="snpeffdb" name="snpeff_db" label="${genomeVersion}" />
+    </outputs>
+    <stdio>
+        <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
+        <exit_code range="1:"  level="fatal"   description="Error" />
+     </stdio>
+    <help>
+
+This tool downloads a SnpEff database into the users history for use by snpEff.
+
+For details about this tool, please go to http://snpEff.sourceforge.net
+
+Please cite:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+    </help>
+</tool>
+
--- a/snpSift_annotate.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpSift_annotate.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,11 +1,11 @@
-<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.3">
+<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.4">
 	<description>Annotate SNPs from dbSnp</description>
 	<!-- 
 	    You will need to change the path to wherever your installation is.
 		You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
 	-->
 	<requirements>
-                <requirement type="package" version="3.3">snpEff</requirement>
+                <requirement type="package" version="3.4">snpEff</requirement>
 	</requirements>
 	<command>
         java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd 
@@ -20,18 +20,22 @@
 		<param format="vcf" name="input" type="data" label="VCF input"/>
 		<param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" 
                        help="The ID field for a variant in input will be assigned from a matching variant in this file."/>
-		<param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Annotate in Memory" 
-                       help="allows unsorted VCF files, but it loads the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files)"/>
                 <conditional name="annotate">
 			<param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/>
 			<when value="id"/>
 			<when value="info">
-				<param name="info_ids" type="text" value="" optional="true" label="Limit INFO annotation to these INFO IDs"
+				<param name="info_ids" type="text" value="" size="60" optional="true" label="Limit INFO annotation to these INFO IDs"
                                        help="list is a comma separated list of fields. When blank, all INFO fields are included">	
 					<validator type="regex" message="IDs separted by commas">^(([a-zA-Z][a-zA-Z0-9_-]*)(,[a-zA-Z][a-zA-Z0-9_-]*)*)?$</validator>
 				</param>
 			</when>
                 </conditional>
+		<param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Annotate in Memory"> 
+                       <help>
+                       Allows unsorted VCF files, but it loads the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files).
+                       Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files). 
+                       </help>
+                </param>
 	</inputs>
         <stdio>
           <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
@@ -60,6 +64,44 @@
 
 For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#annotate
 
+Annotatating only the ID field from dbSnp137.vcf ::
+
+    Input VCF:
+    CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192
+
+
+
+Annotatating both the ID and INFO fields from dbSnp137.vcf ::
+
+    Input VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    .            T    C    0.0    FAIL    NS=244
+    22      16350245    .            C    A    0.0    FAIL    NS=192
+
+    Annotated Output VCF:
+    #CHROM  POS         ID           REF  ALT  QUAL   FILTER  INFO
+    22      16157571    .            T    G    0.0    FAIL    NS=53
+    22      16346045    rs56234788   T    C    0.0    FAIL    NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO
+    22      16350245    rs2905295    C    A    0.0    FAIL    NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO
+
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+
 	</help>
 </tool>
 
--- a/snpSift_caseControl.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpSift_caseControl.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,11 +1,11 @@
-<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.3">
+<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.4">
   <description>Count samples are in 'case' and 'control' groups.</description>
   <!-- 
     You will need to change the path to wherever your installation is.
     You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
   -->
   <requirements>
-                <requirement type="package" version="3.3">snpEff</requirement>
+                <requirement type="package" version="3.4">snpEff</requirement>
   </requirements>
   <command>
     java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q 
@@ -121,5 +121,13 @@
 
 For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#casecontrol
 
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+
+
   </help>
 </tool>
--- a/snpSift_filter.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpSift_filter.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,98 +1,123 @@
-<tool id="snpSift_filter" name="SnpSift Filter" version="3.3">
-	<options sanitize="False" />
-	<description>Filter variants using arbitrary expressions</description>
-	<requirements>
-                <requirement type="package" version="3.3">snpEff</requirement>
-	</requirements>
-	<command>
-		java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse $pass 
-                #if $filterId and len($filterId.__str__.strip()) > 0:
-			--filterId = "$filterId"
-                #end if
-                #if $addFilter and len($addFilter.__str__.strip()) > 0:
-			--addFilter = "$addFilter"
-                #end if
-                #if $rmFilter and len($rmFilter.__str__.strip()) > 0:
-			--rmFilter = "$rmFilter"
+<tool id="snpSift_filter" name="SnpSift Filter" version="3.4">
+    <options sanitize="False" />
+    <description>Filter variants using arbitrary expressions</description>
+    <requirements>
+        <requirement type="package" version="3.4">snpEff</requirement>
+    </requirements>
+    <command>
+        java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse 
+        #if $filtering.mode == 'field':
+            #if $filtering.replace.pass:
+                --pass
+                #if $filtering.replace.filterId and len($filtering.replace.filterId.__str__.strip()) > 0:
+                    --filterId "$filtering.replace.filterId"
                 #end if
- 		> $output
-	</command>
-	<inputs>
-		<param format="vcf" name="input" type="data" label="VCF input"/>
-		<param name="expr" type="text" label="Expression" size="120"/>
-		<param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse. Show lines that do not match filter expression"/>
-		<param name="pass" type="boolean" truevalue="--pass" falsevalue="" checked="false" label="Use 'PASS' field instead of filtering out VCF entries"/>
-		<param name="filterId" type="text" value="" optional="true" label="ID for this filter (##FILTER tag in header and FILTER VCF field)." size="10"/>
-		<param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
-		<param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
-	</inputs>
-	<configfiles>
-		<configfile name="exprFile">
-		$expr
-		</configfile> 
-	</configfiles>
-
-	<outputs>
-		<data format="vcf" name="output" />
-	</outputs>
-        <stdio>
-          <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
-          <exit_code range="1:"  level="fatal"   description="Error" />
-        </stdio>
-
-        <tests>
+            #end if
+            #if $filtering.addFilter and len($filtering.addFilter.__str__.strip()) > 0:
+                --addFilter "$filtering.addFilter"
+            #end if
+            #if $filtering.rmFilter and len($filtering.rmFilter.__str__.strip()) > 0:
+                --rmFilter "$filtering.rmFilter"
+            #end if
+        #end if
+         > $output
+    </command>
+    <inputs>
+        <param format="vcf" name="input" type="data" label="VCF input"/>
+        <param name="expr" type="text" label="Expression" size="160"/>
+        <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse. Show lines that do not match filter expression"/>
+        <conditional name="filtering">
+            <param name="mode" type="select" labael="Filter mode">
+                <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option>
+                <option value="field">Change the FILTER field, but retain all entries</option>
+            </param> 
+            <when value="entries"/>
+            <when value="field">
+                <conditional name="replace">
+                    <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" 
+                           help="appends an ID tag to non-matching entry FILTER "/>
+                    <when value="no"/>
+                    <when value="yes">
+                        <param name="filterId" type="text" value="" optional="true" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." size="10"
+                               help="Default ID is 'SnpSift'"/>
+                    </when>
+                </conditional>
+                <param name="addFilter" type="text" value="" optional="true" label="Add a string to FILTER VCF field if 'expression' is true." size="10"/>
+                <param name="rmFilter" type="text" value="" optional="true" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." size="10"/>
+            </when>
+        </conditional>
+    </inputs>
+    <configfiles>
+        <configfile name="exprFile">
+        $expr
+        </configfile> 
+    </configfiles>
 
-            <test>
-                <param name="input" ftype="vcf" value="test01.vcf"/>
-                <param name="expr" value="QUAL >= 50"/>
-                <output name="output">
-                    <assert_contents>
-                        <has_text text="28837706" />
-                        <not_has_text text="NT_166464" />
-                    </assert_contents>
-                </output>
-            </test>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <stdio>
+      <exit_code range=":-1"  level="fatal"   description="Error: Cannot open file" />
+      <exit_code range="1:"  level="fatal"   description="Error" />
+    </stdio>
+
+    <tests>
 
-            <test>
-                <param name="input" ftype="vcf" value="test01.vcf"/>
-                <param name="expr" value="(CHROM = '19')"/>
-                <output name="output">
-                    <assert_contents>
-                        <has_text text="3205820" />
-                        <not_has_text text="NT_16" />
-                    </assert_contents>
-                </output>
-            </test>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="QUAL >= 50"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="28837706" />
+            <not_has_text text="NT_166464" />
+            </assert_contents>
+        </output>
+        </test>
 
-            <test>
-                <param name="input" ftype="vcf" value="test01.vcf"/>
-                <param name="expr" value="(POS >= 20175) &amp; (POS &lt;= 35549)"/>
-                <output name="output">
-                    <assert_contents>
-                        <has_text text="20175" />
-                        <has_text text="35549" />
-                        <has_text text="22256" />
-                        <not_has_text text="18933" />
-                        <not_has_text text="37567" />
-                    </assert_contents>
-                </output>
-            </test>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(CHROM = '19')"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="3205820" />
+            <not_has_text text="NT_16" />
+            </assert_contents>
+        </output>
+        </test>
 
-            <test>
-                <param name="input" ftype="vcf" value="test01.vcf"/>
-                <param name="expr" value="( DP >= 5 )"/>
-                <output name="output">
-                    <assert_contents>
-                        <has_text text="DP=5;" />
-                        <has_text text="DP=6;" />
-                        <not_has_text text="DP=1;" />
-                    </assert_contents>
-                </output>
-            </test>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="(POS >= 20175) &amp; (POS &lt;= 35549)"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="20175" />
+            <has_text text="35549" />
+            <has_text text="22256" />
+            <not_has_text text="18933" />
+            <not_has_text text="37567" />
+            </assert_contents>
+        </output>
+        </test>
 
-        </tests>
+        <test>
+        <param name="input" ftype="vcf" value="test01.vcf"/>
+        <param name="expr" value="( DP >= 5 )"/>
+        <param name="mode" value="entries"/>
+        <output name="output">
+            <assert_contents>
+            <has_text text="DP=5;" />
+            <has_text text="DP=6;" />
+            <not_has_text text="DP=1;" />
+            </assert_contents>
+        </output>
+        </test>
 
-	<help>
+    </tests>
+
+    <help>
 
 **SnpSift filter**
 
@@ -123,5 +148,12 @@
 
 For complete details about this tool and epressions that can be used, please go to http://snpeff.sourceforge.net/SnpSift.html#filter
 
-	</help>
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+
+    </help>
 </tool>
--- a/snpSift_int.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/snpSift_int.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,11 +1,11 @@
-<tool id="snpSift_int" name="SnpSift Intervals" version="3.3">
+<tool id="snpSift_int" name="SnpSift Intervals" version="3.4">
 	<description>Filter variants using intervals </description>
 	<!-- 
 	    You will need to change the path to wherever your installation is.
 		You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
 	-->
 	<requirements>
-                <requirement type="package" version="3.3">snpEff</requirement>
+                <requirement type="package" version="3.4">snpEff</requirement>
 	</requirements>
 	<command>
 		java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output
@@ -57,5 +57,11 @@
 
 For details about this tool, please go to http://snpeff.sourceforge.net/SnpSift.html#intervals
 
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
 	</help>
 </tool>
--- a/tool-data/snpeff_genomedb.loc.sample	Mon Oct 07 10:59:44 2013 -0500
+++ b/tool-data/snpeff_genomedb.loc.sample	Wed Nov 27 09:11:32 2013 -0600
@@ -1,5 +1,5 @@
 ## Downloaded Databases for SnpEff 
 ## These are from the list on: http://snpeff.sourceforge.net/download.html
 ## the Description field in this sample is "Genome : Version" 
-#Version        Description
-#GRCh37.68      Homo sapiens : GRCh37.68
+#Version        Description	data_dir path
+#GRCh37.68      Homo sapiens : GRCh37.68	/home/galaxy/snpEff/data
--- a/tool_data_table_conf.xml.sample	Mon Oct 07 10:59:44 2013 -0500
+++ b/tool_data_table_conf.xml.sample	Wed Nov 27 09:11:32 2013 -0600
@@ -4,12 +4,16 @@
         <file path="tool-data/snpeff_databases.loc" />
     </table>
     <table name="snpeff_genomedb" comment_char="#">
-        <columns>value, name</columns>
+        <columns>value, name, path</columns>
         <file path="tool-data/snpeff_genomedb.loc" />
     </table>
     <table name="snpeff_regulationdb" comment_char="#">
-        <columns>value, name</columns>
+        <columns>genome, value, name</columns>
         <file path="tool-data/snpeff_regulationdb.loc" />
     </table>
+    <table name="snpeff_annotations" comment_char="#">
+        <columns>genome, value, name</columns>
+        <file path="tool-data/snpeff_annotations.loc" />
+    </table>
 </tables>
 
--- a/tool_dependencies.xml	Mon Oct 07 10:59:44 2013 -0500
+++ b/tool_dependencies.xml	Wed Nov 27 09:11:32 2013 -0600
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="snpEff" version="3.3">
-        <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="package_snpeff_3_3" owner="jjohnson" changeset_revision="a462177619be" />
+    <package name="snpEff" version="3.4">
+        <repository toolshed="http://testtoolshed.g2.bx.psu.edu" name="package_snpeff_3_4" owner="jjohnson" changeset_revision="7fb1061d905a" />
     </package>
 </tool_dependency>