changeset 3:b5e03b2cb813 draft

Uploaded
author jjohnson
date Wed, 09 Dec 2015 14:07:32 -0500
parents c56e27adfbff
children 772825adad2f
files repository_dependencies.xml snpEff_macros.xml snpSift_dbnsfp.xml snpSift_macros.xml tool-data/snpsift_dbnsfp.loc.sample tool-data/snpsift_dbnsfps.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 8 files changed, 77 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/repository_dependencies.xml	Mon Nov 10 09:45:26 2014 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-<?xml version="1.0"?>
-<repositories description="This requires the SnpEff dbnsfp datatype definitions.">
-  <repository changeset_revision="df236b5e2985" name="snpsift_dbnsfp_datatypes" owner="jjohnson" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-</repositories>
--- a/snpEff_macros.xml	Mon Nov 10 09:45:26 2014 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<macros>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="4.0">snpEff</requirement>
-        </requirements>
-    </xml>
-  <xml name="stdio">
-    <stdio>
-        <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
-        <exit_code range="1:"  level="fatal" description="Error" />
-    </stdio>
-  </xml>
-  <token name="@EXTERNAL_DOCUMENTATION@">
-
-For details about this tool, please go to:
-	http://snpeff.sourceforge.net/SnpEff_manual.html
-
-  </token>
-  <token name="@CITATION_SECTION@">------
-
-**Citation**
-
-For the underlying tool, please cite the following two publications:
-
-SnpEff citation:
-"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
-
-SnpSift citation:
-"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
-
-  </token>
-</macros>
--- a/snpSift_dbnsfp.xml	Mon Nov 10 09:45:26 2014 -0600
+++ b/snpSift_dbnsfp.xml	Wed Dec 09 14:07:32 2015 -0500
@@ -1,10 +1,12 @@
-<tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="4.0.0">
-    <description>Add Annotations from dbNSFP</description>
-    <expand macro="requirements" />
+<tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="@WRAPPER_VERSION@.0">
+    <description>Add Annotations from dbNSFP or similar annotation DBs</description>
     <macros>
-        <import>snpEff_macros.xml</import>
+        <import>snpSift_macros.xml</import>
     </macros>
-    <command>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
         java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v
         #if $db.dbsrc == 'cached' :
           -db $db.dbnsfp 
@@ -18,7 +20,8 @@
           #end if
         #end if          
         $input > $output  
-        2> tmp.err &amp;&amp; grep -v file tmp.err
+        2> tmp.err && grep -v file tmp.err
+]]>
     </command>
     <inputs>
         <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
@@ -29,17 +32,17 @@
             </param>
             <when value="cached">
                 <param name="dbnsfp" type="select" label="Genome">
-                    <options from_data_table="snpsift_dbnsfp">
-                        <column name="name" index="1"/>
+                    <options from_data_table="snpsift_dbnsfps">
+                        <column name="name" index="2"/>
                         <column name="value" index="3"/>
                     </options>
                 </param>
                 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
-                    <options from_data_table="snpsift_dbnsfp">
-                        <column name="name" index="3"/>
-                        <column name="value" index="3"/>
-                        <filter type="param_value" ref="dbnsfp" column="2" />
-                        <filter type="multiple_splitter" column="3" separator=","/>
+                    <options from_data_table="snpsift_dbnsfps">
+                        <column name="name" index="4"/>
+                        <column name="value" index="4"/>
+                        <filter type="param_value" ref="dbnsfp" column="3" />
+                        <filter type="multiple_splitter" column="4" separator=","/>
                     </options>
                 </param>
             </when>
@@ -53,7 +56,6 @@
             </when>
         </conditional>
     </inputs>
-    <expand macro="stdio" />
     <outputs>
         <data format="vcf" name="output" />
     </outputs>
@@ -70,9 +72,10 @@
             </output>
         </test>
     </tests>
-    <help>
+    <help><![CDATA[
 
 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
+It contains variant annotations such as:
 
 
   1000Gp1_AC
@@ -124,7 +127,7 @@
   ESP6500_EA_AF
     Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
   FATHMM_pred
-    If a FATHMM_score is &lt;=-1.5 (or rankscore &lt;=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
+    If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
   FATHMM_rankscore
     FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
   FATHMM_score
@@ -152,7 +155,7 @@
   LRT_Omega
     Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
   LRT_converted_rankscore
-    LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega&lt;1, or LRTnew=LRTori*0.5 if Omega&gt;=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
+    LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
   LRT_pred
     LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
   LRT_score
@@ -222,7 +225,7 @@
   SIFT_converted_rankscore
     SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
   SIFT_pred
-    If SIFTori is smaller than 0.05 (rankscore&gt;0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
+    If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
   SIFT_score
     SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
   SiPhy_29way_logOdds
@@ -241,7 +244,11 @@
     rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
 
 
-The website for  dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP  and there is only annotation for human hg18 and hg19 genome builds. 
+The website for  dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human genome builds. 
+
+The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation:
+*( It also provides links for dbNSFP databases prebuilt for SnpSift )*
+http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
 
 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has::
 
@@ -265,10 +272,9 @@
 	4	100239319	T	G	H	P	ADH1B	0
 
 
-The uploaded tabular file should be set to datatype: "dbnsfp.tabular"
-Using "Convert Format" the "dbnsfp.tabular" can be converted to the correct format for SnpSift dbnsfp.
-
-The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation.
+The galaxy datatypes for dbNSFP can automatically convert the specially formatted tabular file for use by SnpSift dbNSFP:
+  1. Upload the tabular file, set the datatype as: **"dbnsfp.tabular"**
+  2. Edit the history dataset attributes (pencil icon): Use "Convert Format" to convert the **"dbnsfp.tabular"** to the correct format for SnpSift dbnsfp: **"snpsiftdbnsfp"**.
 
 
 @EXTERNAL_DOCUMENTATION@
@@ -276,6 +282,6 @@
 
 @CITATION_SECTION@
 
-
+]]>
     </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpSift_macros.xml	Wed Dec 09 14:07:32 2015 -0500
@@ -0,0 +1,39 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="4.1">snpEff</requirement>
+        </requirements>
+    </xml>
+  <xml name="stdio">
+    <stdio>
+        <exit_code range=":-1"  level="fatal" description="Error: Cannot open file" />
+        <exit_code range="1:"  level="fatal" description="Error" />
+    </stdio>
+  </xml>
+  <xml name="version_command">
+    <version_command>java -jar $SNPEFF_JAR_PATH/SnpEff.jar -version</version_command>
+  </xml>
+  <token name="@WRAPPER_VERSION@">4.1</token>
+  <token name="@EXTERNAL_DOCUMENTATION@">
+
+For details about this tool, please go to:
+	http://snpeff.sourceforge.net/SnpEff_manual.html
+
+  </token>
+  <token name="@CITATION_SECTION@">------
+
+**Citation**
+
+For the underlying tool, please cite the following two publications:
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3:35, 2012.
+
+  </token>
+  <xml name="citations">
+      <citations>
+        <citation type="doi">10.3389/fgene.2012.00035</citation>
+        <yield />
+      </citations>
+  </xml>
+</macros>
--- a/tool-data/snpsift_dbnsfp.loc.sample	Mon Nov 10 09:45:26 2014 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-#id	build	description	path	annotations
-#GRCh37_dbNSFP2.4	GRCh37	GRCh37 dbNSFP2.4	/depot/snpeff/	SIFT_pred,Uniprot_acc
-#GRCh38_dbNSFP2.7	GRCh38	GRCh38 dbNSFP2.7	/depot/snpeff/	SIFT_pred,Uniprot_acc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/snpsift_dbnsfps.loc.sample	Wed Dec 09 14:07:32 2015 -0500
@@ -0,0 +1,3 @@
+#key	build	description	path	annotations
+#GRCh37_dbNSFP2.4	GRCh37	GRCh37 dbNSFP2.4	/depot/snpeff/dbNSFP2.4.txt.gz	SIFT_pred,Uniprot_acc
+#GRCh38_dbNSFP3.1c	GRCh38	GRCh38 dbNSFP3.1c	/depot/snpeff/dbNSFP3.1c.txt.gz	SIFT_pred,Uniprot_acc
--- a/tool_data_table_conf.xml.sample	Mon Nov 10 09:45:26 2014 -0600
+++ b/tool_data_table_conf.xml.sample	Wed Dec 09 14:07:32 2015 -0500
@@ -1,7 +1,7 @@
 <tables>
-    <table name="snpsift_dbnsfp" comment_char="#">
-        <columns>dbkey, build, name, value, annotations</columns>
-        <file path="tool-data/snpsift_dbnsfp.loc" />
+    <table name="snpsift_dbnsfps" comment_char="#">
+        <columns>key, build, name, value, annotations</columns>
+        <file path="tool-data/snpsift_dbnsfps.loc" />
     </table>
 </tables>
 
--- a/tool_dependencies.xml	Mon Nov 10 09:45:26 2014 -0600
+++ b/tool_dependencies.xml	Wed Dec 09 14:07:32 2015 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="snpEff" version="4.0">
-      <repository changeset_revision="4ac635fc1781" name="package_snpeff_4_0" owner="jjohnson" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <package name="snpEff" version="4.1">
+        <repository changeset_revision="02a5e07a4121" name="package_snpeff_4_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>