# HG changeset patch # User Jim Johnson # Date 1421177771 21600 # Node ID 36f3c82323e6bdbfb71521d557499212a89c2d19 # Parent 13d81e5226cd8384662cacf540e3932b11c97c08 Use tool_data_table with key and version columns added to allow for multiple versions in a .loc file diff -r 13d81e5226cd -r 36f3c82323e6 readme.rst --- a/readme.rst Mon Nov 10 09:33:36 2014 -0600 +++ b/readme.rst Tue Jan 13 13:36:11 2015 -0600 @@ -1,20 +1,38 @@ -These are galaxy tools for SnpEff a variant annotation and effect prediction tool by Pablo Cingolani. +SnpEff wrappers +=============== + +These are galaxy tools for SnpEff_, a variant annotation and effect prediction tool by Pablo Cingolani. It annotates and predicts the effects of variants on genes (such as amino acid changes). -( http://snpeff.sourceforge.net/ ) + +.. _SnpEff: http://snpeff.sourceforge.net/ + + +This repository let you automatically install SnpEff and SnpSift. +This will use the default location for genome reference downloads from the ``snpEff.config`` file: -This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift. + data_dir = ~/snpEff/data/ + +You can manually edit the installed ``snpEff.config`` file and change the location, or you can create a symbolic link to the desired data location from ``~/snpEff``. + +The genome reference options used by the tools "SnpEff" (snpEff.xml) and "SnpEff Download" (snpEff_download.xml) are taken from the ``tool-data/snpeffect_genomedb.loc`` file. +You can fill this file by running the following command: -The genome reference options used by the tools: - "SnpEff" snpEff.xml - "SnpEff Download" snpEff_download.xml -are taken from: tool-data/snpeffect_genomedb.loc + java -jar snpEff.jar databases | tail -n +3 | cut -f 1,2 | awk '{ gsub(/_/, " ", $2); printf "%s\\t%s : %s\\n", $1, $2, $1 }' | sort -k 2 > snpeffect_genomedb.loc + +There are 2 datamanagers to download and install prebuilt SnpEff genome databases: + +* data_manager_snpeff_databases: generates a list of available SnpEff genome databases into the ``tool-data/snpeff_databases.loc`` file +* data_manager_snpeff_download: downloads a SnpEff genome database selected from ``tool-data/snpeff_databases.loc`` and adds entries to ``snpeff_genomedb.loc``, ``snpeff_regulationdb.loc`` and ``snpeff_annotations.loc`` + +SnpEff citation: |Cingolani2012program|_. -There are 2 datamanagers to download and install prebuilt SnpEff Genome databases: - data_manager_snpeff_databases - generates a list of available SnpEff genome databases into the tool-data/snpeff_databases.loc - data_manager_snpeff_download - downloads a SnpEff genome database selected from: tool-data/snpeff_databases.loc and adds entries to snpeff_genomedb.loc,snpeff_regulationdb.loc,snpeff_annotations.loc +.. |Cingolani2012program| replace:: Cingolani, P., Platts, A., Wang, L. L., Coon, M., Nguyen, T., Wang, L., Land, S. J., Lu, X., Ruden, D. M. (2012) A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of *Drosophila melanogaster* strain w1118; iso-2; iso-3. *Fly* 6(2):80-92 +.. _Cingolani2012program: https://www.landesbioscience.com/journals/fly/article/19695/ + +SnpSift citation: |Cingolani2012using|_. -SnpEff citation: -"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process] +.. |Cingolani2012using| replace:: Cingolani, P., Patel, V. M., Coon, M., Nguyen, T., Land, S. J., Ruden, D. M., Lu, X. (2012) Using *Drosophila melanogaster* as a model for genotoxic chemical mutational studies with a new program, SnpSift. *Front. Genet.* 3:35 +.. _Cingolani2012using: http://journal.frontiersin.org/Journal/10.3389/fgene.2012.00035/ -SnpSift citation: -"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012. +Wrapper authors: Jim Johnson + diff -r 13d81e5226cd -r 36f3c82323e6 repository_dependencies.xml --- a/repository_dependencies.xml Mon Nov 10 09:33:36 2014 -0600 +++ b/repository_dependencies.xml Tue Jan 13 13:36:11 2015 -0600 @@ -1,4 +1,5 @@ - + + diff -r 13d81e5226cd -r 36f3c82323e6 snpEff.xml --- a/snpEff.xml Mon Nov 10 09:33:36 2014 -0600 +++ b/snpEff.xml Tue Jan 13 13:36:11 2015 -0600 @@ -1,13 +1,14 @@ - + Variant effect and annotation snpEff_macros.xml + $snpeff_output + $input > $snpeff_output ; #if $statsFile: - && mkdir $statsFile.extra_files_path && cp snpeff_report.genes.txt $statsFile.extra_files_path + #import os + #set $genes_file = str($statsFile) + '.genes.txt' + #set $genes_file_name = os.path.split($genes_file)[-1] + mkdir $statsFile.files_path; + mv $genes_file #echo os.path.join($statsFile.files_path, $genes_file_name)#; #end if + #if $outputConditional.outputFormat == 'gatk' and $outputConditional.gatk_v1 + ## Replace real SnpEff version with 2.0.5 to prevent this GATK 1.x error: "The version of SnpEff used to generate the SnpEff input file (x.x) is not currently supported by the GATK. Supported versions are: [2.0.5]" + sed -i 's/^\#\#SnpEffVersion="\(\S*\s\)/\#\#SnpEffVersion="2.0.5 - real is \1/' $snpeff_output + #end if +]]> @@ -81,12 +91,22 @@ - - - - - - + + + + + + + + + + + + + + + + @@ -97,35 +117,36 @@ - - + + + - + These are available for only a few genomes - - + + - + These are available for only a few genomes - - + + - + - + These are available for only a few genomes - + These are available for only a few genomes @@ -134,7 +155,7 @@ - http://sourceforge.net/projects/snpeff/files/databases/v4_0/ + @SNPEFF_DATABASE_URL@ \S+ @@ -153,7 +174,7 @@ - + @@ -165,24 +186,24 @@ - - + + - - - - + + + + - + - + @@ -217,13 +238,12 @@ - - - - + + + - + generate_stats == True @@ -287,7 +307,7 @@ - + @@ -333,5 +353,6 @@ @CITATION_SECTION@ + diff -r 13d81e5226cd -r 36f3c82323e6 snpEff_databases.xml --- a/snpEff_databases.xml Mon Nov 10 09:33:36 2014 -0600 +++ b/snpEff_databases.xml Tue Jan 13 13:36:11 2015 -0600 @@ -1,11 +1,13 @@ - + snpEff_macros.xml + $snpeff_dbs +]]> diff -r 13d81e5226cd -r 36f3c82323e6 snpEff_download.xml --- a/snpEff_download.xml Mon Nov 10 09:33:36 2014 -0600 +++ b/snpEff_download.xml Tue Jan 13 13:36:11 2015 -0600 @@ -1,21 +1,22 @@ - + Download a new database snpEff_macros.xml - java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config -dataDir $snpeff_db.extra_files_path -v $genome_version > $logfile + - - http://sourceforge.net/projects/snpeff/files/databases/ + + @SNPEFF_DATABASE_URL@ \S+ - - + @@ -25,5 +26,6 @@ @CITATION_SECTION@ + diff -r 13d81e5226cd -r 36f3c82323e6 snpEff_macros.xml --- a/snpEff_macros.xml Mon Nov 10 09:33:36 2014 -0600 +++ b/snpEff_macros.xml Tue Jan 13 13:36:11 2015 -0600 @@ -10,6 +10,8 @@ + 4.0 + http://sourceforge.net/projects/snpeff/files/databases/v4_1/ For details about this tool, please go to: @@ -23,10 +25,13 @@ For the underlying tool, please cite the following two publications: SnpEff citation: -"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process] - -SnpSift citation: -"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012. +"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly 6(2):80-92, 2012 + + + 10.4161/fly.19695 + + + diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeff4_annotations.loc.sample --- a/tool-data/snpeff4_annotations.loc.sample Mon Nov 10 09:33:36 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Regulation Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -#genome annotation_name description -#GRCh37.71 nextprot nextprot -#GRCh37.71 motif motif diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeff4_databases.loc.sample --- a/tool-data/snpeff4_databases.loc.sample Mon Nov 10 09:33:36 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Available Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -## the Description field in this sample is "Genome : Version" -#Version Description -#GRCh37.68 Homo sapiens : GRCh37.68 diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeff4_genomedb.loc.sample --- a/tool-data/snpeff4_genomedb.loc.sample Mon Nov 10 09:33:36 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Downloaded Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -## the Description field in this sample is "Genome : Version" -#Version Description data_dir path -#GRCh37.68 Homo sapiens : GRCh37.68 /home/galaxy/snpEff/data diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeff4_regulationdb.loc.sample --- a/tool-data/snpeff4_regulationdb.loc.sample Mon Nov 10 09:33:36 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -## Regulation Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -#genome regulation_name description -#GRCh37.70 CD4 CD4 diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeffv_annotations.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_annotations.loc.sample Tue Jan 13 13:36:11 2015 -0600 @@ -0,0 +1,5 @@ +## Regulation Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +#key snpeff_version genome annotation_name description +#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 nextprot nextprot +#SnpEff4.0_GRCh38.76 SnpEff4.1 GRCh38.76 motif motif diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeffv_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_databases.loc.sample Tue Jan 13 13:36:11 2015 -0600 @@ -0,0 +1,5 @@ +## Available Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +## the Description field in this sample is "Genome : Version" +#key snpeff_version Version Description +#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 Homo sapiens : GRCh37.75 diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeffv_genomedb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_genomedb.loc.sample Tue Jan 13 13:36:11 2015 -0600 @@ -0,0 +1,6 @@ +## Downloaded Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +## the Description field in this sample is "Genome : Version" +#Key snpeff_version Version Description data_dir path +#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 Homo sapiens : GRCh37.74 /home/galaxy/snpEff/v4_0/data +#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 Homo sapiens : GRCh38.76 /home/galaxy/snpEff/v4_1/data diff -r 13d81e5226cd -r 36f3c82323e6 tool-data/snpeffv_regulationdb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_regulationdb.loc.sample Tue Jan 13 13:36:11 2015 -0600 @@ -0,0 +1,5 @@ +## Regulation Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +#Key snpeff_version genome regulation_name description +#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 CD4 CD4 +#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 CD4 CD4 diff -r 13d81e5226cd -r 36f3c82323e6 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Mon Nov 10 09:33:36 2014 -0600 +++ b/tool_data_table_conf.xml.sample Tue Jan 13 13:36:11 2015 -0600 @@ -1,19 +1,19 @@ - - value, name - +
+ key, version, value, name +
- - value, name, path - +
+ key, version, value, name, path +
- - genome, value, name - +
+ key, version, genome, value, name +
- - genome, value, name - +
+ key, version, genome, value, name +