Mercurial > repos > jjohnson > snpeff
changeset 2:6ad9205c1307
Update to SnpEff version 3.3
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 04 Oct 2013 09:09:29 -0500 |
parents | bea875f081e8 |
children | a8f4ebfb8253 |
files | README data_manager/data_manager_snpEff_databases.py data_manager/data_manager_snpEff_databases.xml data_manager/data_manager_snpEff_download.py data_manager/data_manager_snpEff_download.xml data_manager_conf.xml snpEff.xml snpSift_annotate.xml snpSift_caseControl.xml snpSift_filter.xml snpSift_int.xml tool-data/snpeff_annotations.loc.sample tool-data/snpeff_regulationdb.loc.sample |
diffstat | 13 files changed, 115 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/README Thu Jul 04 09:49:15 2013 -0500 +++ b/README Fri Oct 04 09:09:29 2013 -0500 @@ -1,15 +1,18 @@ -These are galaxy tools for Snp Effect ( http://snpeff.sourceforge.net/ ) +These are galaxy tools for SnpEff ( http://snpeff.sourceforge.net/ ) + +This repository contains a tool_dependencies.xml file that will attempt to automatically install SnpEff and SnpSift. + +This will use the default location for genome reference downloads from the snpEff.config: +data_dir = ~/snpEff/data/ +You can manually edit the installed snpEff.config and change the location, or you can create a symbolic link to the desired data location from ~/snpEff. + The genome reference options used by the tools: "SnpEff" snpEff.xml "SnpEff Download" snpEff_download.xml are taken from: tool-data/snpeffect_genomedb.loc -The tool-data/snpeffect_genomedb.loc.sample file has the genomes references from the SnpEffect dwnloads page: -http://snpeff.sourceforge.net/download.html -The values for snpeffect_genomedb.loc.sample were populated by: -java -jar snpEff.jar cfg2table galaxy | grep 'option' | sed 's/^.*value="\([^"]*\)">\([^<]*\).*$/\1#\2/' | tr '#' '\t' >> snpeffect_genomedb.loc.sample +There are 2 datamanagers to download and install prebuilt SnpEff Genome databases: + data_manager_snpeff_databases - generates a list of available SnpEff genome databases into the tool-data/snpeff_databases.loc + data_manager_snpeff_download - downloads a SnpEff genome database selected from: tool-data/snpeff_databases.loc and adds entries to snpeff_genomedb.loc,snpeff_regulationdb.loc,snpeff_annotations.loc -This repository contains a tool_dependencies.xml file that will allow SnpEff and SnpSift to be automatically installed. -This will use the default location for genome reference downloads from the snpEff.config: -data_dir = ~/snpEff/data/
--- a/data_manager/data_manager_snpEff_databases.py Thu Jul 04 09:49:15 2013 -0500 +++ b/data_manager/data_manager_snpEff_databases.py Fri Oct 04 09:09:29 2013 -0500 @@ -46,7 +46,7 @@ genome_version = fields[0].strip() if genome_version.startswith("Genome") or genome_version.startswith("-"): continue - description = fields[1].strip() + description = fields[1].strip() + ' : ' + genome_version data_table_entries.append(dict(value=genome_version, name=description)) data_manager_dict['data_tables']['snpeff_databases'] = data_table_entries except Exception, e:
--- a/data_manager/data_manager_snpEff_databases.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/data_manager/data_manager_snpEff_databases.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,10 +1,10 @@ -<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.2" tool_type="manage_data"> +<tool id="data_manager_snpeff_databases" name="SnpEff Databases" version="3.3" tool_type="manage_data"> <description>Read the list of available snpEff databases</description> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command interpreter="python"> - data_manager_snpEff_databases.py --jar_path \$JAVA_JAR_PATH/snpEff.jar "$out_file" + data_manager_snpEff_databases.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar "$out_file" </command> <inputs> </inputs>
--- a/data_manager/data_manager_snpEff_download.py Thu Jul 04 09:49:15 2013 -0500 +++ b/data_manager/data_manager_snpEff_download.py Fri Oct 04 09:09:29 2013 -0500 @@ -22,7 +22,7 @@ # Download human database 'hg19' java -jar snpEff.jar download -v hg19 - <command>java -jar \$JAVA_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> + <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> snpEffectPredictor.bin regulation_HeLa-S3.bin @@ -65,6 +65,8 @@ sys.exit( return_code ) ## search data_dir/genome_version for files regulation_pattern = 'regulation_(.+).bin' + # annotation files that are included in snpEff by a flag + annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} genome_path = os.path.join(data_dir,genome_version) if os.path.isdir(genome_path): for root, dirs, files in os.walk(genome_path): @@ -78,8 +80,13 @@ m = re.match(regulation_pattern,fname) if m: name = m.groups()[0] - data_table_entry = dict(value=genome_version, name=name) + data_table_entry = dict(genome=genome_version,value=name, name=name) _add_data_table_entry( data_manager_dict, 'snpeff_regulationdb', data_table_entry ) + elif fname in annotations_dict: + value = annotations_dict[fname] + name = value.lstrip('-') + data_table_entry = dict(genome=genome_version,value=value, name=name) + _add_data_table_entry( data_manager_dict, 'snpeff_annotations', data_table_entry ) return data_manager_dict def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
--- a/data_manager/data_manager_snpEff_download.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/data_manager/data_manager_snpEff_download.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,10 +1,10 @@ -<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.2" tool_type="manage_data"> +<tool id="data_manager_snpeff_download" name="SnpEff Download" version="3.3" tool_type="manage_data"> <description>Download a new database</description> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command interpreter="python"> - data_manager_snpEff_download.py --jar_path \$JAVA_JAR_PATH/snpEff.jar --config \$JAVA_JAR_PATH/snpEff.config + data_manager_snpEff_download.py --jar_path \$SNPEFF_JAR_PATH/snpEff.jar --config \$SNPEFF_JAR_PATH/snpEff.config --genome_version "${genome_databases.fields.value}" --organism "${genome_databases.fields.name}" "$out_file"
--- a/data_manager_conf.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/data_manager_conf.xml Fri Oct 04 09:09:29 2013 -0500 @@ -4,8 +4,7 @@ <data_table name="snpeff_databases"> <!-- Defines a Data Table to be modified. --> <output> <!-- Handle the output of the Data Manager Tool --> <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="name" output_ref="out_file" > <!-- The value of this column will be modified based upon data in "out_file". example value "phiX.fa" --> - </column> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> </output> </data_table> </data_manager> @@ -13,15 +12,28 @@ <data_table name="snpeff_genomedb"> <!-- Defines a Data Table to be modified. --> <output> <!-- Handle the output of the Data Manager Tool --> <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="name" output_ref="out_file" > <!-- The value of this column will be modified based upon data in "out_file". example value "phiX.fa" --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">snpEff/data</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/data</value_translation> + <value_translation type="function">abspath</value_translation> </column> </output> </data_table> <data_table name="snpeff_regulationdb"> <!-- Defines a Data Table to be modified. --> <output> <!-- Handle the output of the Data Manager Tool --> + <column name="genome" /> <!-- columns that are going to be specified by the Data Manager Tool --> <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> - <column name="name" output_ref="out_file" > <!-- The value of this column will be modified based upon data in "out_file". example value "phiX.fa" --> - </column> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + </output> + </data_table> + <data_table name="snpeff_annotations"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="genome" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> </output> </data_table> </data_manager>
--- a/snpEff.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/snpEff.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,4 +1,4 @@ -<tool id="snpEff" name="SnpEff" version="3.2"> +<tool id="snpEff" name="SnpEff" version="3.3"> <description>Variant effect and annotation</description> <!-- You will need to change the path to wherever your installation is. @@ -61,30 +61,34 @@ --> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command> -SNPEFF_DATA_DIR=`grep '^data_dir' \$JAVA_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`; +SNPEFF_DATA_DIR=`grep '^data_dir' \$SNPEFF_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`; eval "if [ ! -e \$SNPEFF_DATA_DIR/$genomeVersion ] ; -then java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion ; +then java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$SNPEFF_JAR_PATH/snpEff.config $genomeVersion ; fi"; -java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar eff -c \$JAVA_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength +java -Xmx6G -jar \$SNPEFF_JAR_PATH/snpEff.jar eff -c \$SNPEFF_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength #if $spliceSiteSize and $spliceSiteSize.__str__ != '': -spliceSiteSize $spliceSiteSize #end if #if $filterIn and $filterIn.__str__ != 'no_filter': - -$filterIn + $filterIn #end if #if $filterHomHet and $filterHomHet.__str__ != 'no_filter': - -$filterHomHet + $filterHomHet #end if #if $annotations and $annotations.__str__ != '': - -#slurp - #echo ' -'.join($annotations.__str__.split(',')) + #echo ' '.join($annotations.__str__.split(',')) +#end if +#if $extra_annotations and $extra_annotations.__str__ != '': + #echo ' '.join($extra_annotations.__str__.split(',')) +#end if +#if $regulation and $regulation.__str__ != '': + -reg #echo ' -reg '.join($regulation.__str__.split(','))# #end if #if $filterOut and $filterOut.__str__ != '': - -#slurp - #echo ' -'.join($filterOut.__str__.split(',')) + #echo ' '.join($filterOut.__str__.split(',')) #end if #if str( $transcripts ) != 'None': -onlyTr $transcripts @@ -96,7 +100,7 @@ -stats $statsFile #end if #if $offset.__str__ != '': - -${offset} + ${offset} #end if #if $chr.__str__.strip() != '': -chr "$chr" @@ -150,35 +154,43 @@ <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes"> <option value="no_filter" selected="true">No filter (analyze everything)</option> - <option value="hom">Analyze homozygous sequence changes only </option> - <option value="het">Analyze heterozygous sequence changes only </option> + <option value="-hom">Analyze homozygous sequence changes only </option> + <option value="-het">Analyze heterozygous sequence changes only </option> </param> <!-- The tool testing code can not handle select,radio,check boxes values that start with '-', so the '-' is added in the command generation --> <param name="filterIn" type="select" display="radio" label="Filter sequence changes"> <option value="no_filter" selected="true">No filter (analyze everything)</option> - <option value="del">Analyze deletions only </option> - <option value="ins">Analyze insertions only </option> - <option value="mnp">Only MNPs (multiple nucleotide polymorphisms) </option> - <option value="snp">Only SNPs (single nucleotide polymorphisms) </option> + <option value="-del">Analyze deletions only </option> + <option value="-ins">Analyze insertions only </option> + <option value="-mnp">Only MNPs (multiple nucleotide polymorphisms) </option> + <option value="-snp">Only SNPs (single nucleotide polymorphisms) </option> </param> <param name="annotations" type="select" display="checkboxes" multiple="true" label="Annotation options"> - <option value="cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option> - <option value="canon">Only use canonical transcripts.</option> - <option value="geneId">Use gene ID instead of gene name (VCF output). Default: false</option> - <option value="hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option> - <option value="lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option> - <option value="oicr">Add OICR tag in VCF file. Default: false</option> - <option value="onlyReg">Only use regulation tracks.</option> - <option value="sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option> + <option value="-cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option> + <option value="-canon">Only use canonical transcripts.</option> + <option value="-geneId">Use gene ID instead of gene name (VCF output). Default: false</option> + <option value="-hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option> + <option value="-lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option> + <option value="-oicr">Add OICR tag in VCF file. Default: false</option> + <option value="-onlyReg">Only use regulation tracks.</option> + <option value="-sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option> + </param> + + <param name="extra_annotations" type="select" display="checkboxes" multiple="true" label="Additional Annotations"> + <help>These are available for only a few genomes</help> + <!--GENOME REG_NAME --> + <options from_data_table="snpeff_annotations"> + <filter type="param_value" ref="genomeVersion" key="genome" column="0" /> + </options> </param> <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory Annotation"> <help>These are available for only a few genomes</help> <!--GENOME REG_NAME --> - <options from_data_table="snpeffect_regulationdb"> - <filter type="param_value" ref="genomeVersion" key="name" column="1" /> + <options from_data_table="snpeff_regulationdb"> + <filter type="param_value" ref="genomeVersion" key="genome" column="0" /> </options> </param> @@ -186,21 +198,21 @@ <param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file. Format: One transcript ID per line."/> <param name="filterOut" type="select" display="checkboxes" multiple="true" label="Filter output"> - <option value="no-downstream">Do not show DOWNSTREAM changes </option> - <option value="no-intergenic">Do not show INTERGENIC changes </option> - <option value="no-intron">Do not show INTRON changes </option> - <option value="no-upstream">Do not show UPSTREAM changes </option> - <option value="no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option> + <option value="-no-downstream">Do not show DOWNSTREAM changes </option> + <option value="-no-intergenic">Do not show INTERGENIC changes </option> + <option value="-no-intron">Do not show INTRON changes </option> + <option value="-no-upstream">Do not show UPSTREAM changes </option> + <option value="-no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option> </param> <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position"> <option value="" selected="true">Use default (based on input type)</option> - <option value="0">Force zero-based positions (both input and output)</option> - <option value="1">Force one-based positions (both input and output)</option> + <option value="-0">Force zero-based positions (both input and output)</option> + <option value="-1">Force one-based positions (both input and output)</option> </param> <param name="chr" type="text" optionl="true" label="Text to prepend to chromosome name" help="By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'. You can prepend any string you want to the chromosome name."> <validator type="regex" message="No whitespace allows">^\S*$</validator> - + </param> <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/> <param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Do not report usage statistics to server"/> @@ -233,9 +245,7 @@ <param name="filterHomHet" value="no_filter"/> <param name="filterIn" value="no_filter"/> <param name="generate_stats" value="False"/> - <!-- - <param name="filterOut" value="no-upstream"/> - --> + <param name="filterOut" value="+-no-upstream"/> <output name="snpeff_output"> <assert_contents> <!-- Check that an effect was added --> @@ -258,7 +268,7 @@ <param name="outputFormat" value="vcf"/> <param name="genomeVersion" value="testCase"/> <param name="udLength" value="0"/> - <param name="filterHomHet" value="het"/> + <param name="filterHomHet" value="+-het"/> <param name="filterIn" value="no_filter"/> <!-- <param name="filterOut" value=""/> @@ -300,7 +310,7 @@ <param name="udLength" value="0"/> <param name="filterHomHet" value="no_filter"/> <param name="filterIn" value="no_filter"/> - <param name="filterOut" value="no-upstream"/> + <param name="filterOut" value="+-no-upstream"/> <param name="generate_stats" value="False"/> <output name="snpeff_output"> <assert_contents>
--- a/snpSift_annotate.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/snpSift_annotate.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,14 +1,14 @@ -<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.2"> +<tool id="snpSift_annotate" name="SnpSift Annotate" version="3.3"> <description>Annotate SNPs from dbSnp</description> <!-- You will need to change the path to wherever your installation is. You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) --> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command> - java -Xmx6G -jar \$JAVA_JAR_PATH/SnpSift.jar $annotate_cmd + java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar $annotate_cmd #if $annotate.id : -id #elif $annotate.info_ids.__str__.strip() != '' :
--- a/snpSift_caseControl.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/snpSift_caseControl.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,14 +1,14 @@ -<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.2"> +<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.3"> <description>Count samples are in 'case' and 'control' groups.</description> <!-- You will need to change the path to wherever your installation is. You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) --> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command> - java -Xmx1G -jar \$JAVA_JAR_PATH/SnpSift.jar caseControl -q + java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q #if $name.__str__.strip() != '': -name $name #end if
--- a/snpSift_filter.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/snpSift_filter.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,11 +1,11 @@ -<tool id="snpSift_filter" name="SnpSift Filter" version="3.2"> +<tool id="snpSift_filter" name="SnpSift Filter" version="3.3"> <options sanitize="False" /> <description>Filter variants using arbitrary expressions</description> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command> - java -Xmx6G -jar \$JAVA_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse $pass + java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar filter -f $input -e $exprFile $inverse $pass #if $filterId and len($filterId.__str__.strip()) > 0: --filterId = "$filterId" #end if
--- a/snpSift_int.xml Thu Jul 04 09:49:15 2013 -0500 +++ b/snpSift_int.xml Fri Oct 04 09:09:29 2013 -0500 @@ -1,14 +1,14 @@ -<tool id="snpSift_int" name="SnpSift Intervals" version="3.2"> +<tool id="snpSift_int" name="SnpSift Intervals" version="3.3"> <description>Filter variants using intervals </description> <!-- You will need to change the path to wherever your installation is. You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) --> <requirements> - <requirement type="package" version="3.2">snpEff</requirement> + <requirement type="package" version="3.3">snpEff</requirement> </requirements> <command> - java -Xmx2G -jar \$JAVA_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output + java -Xmx2G -jar \$SNPEFF_JAR_PATH/SnpSift.jar intervals -i $input $exclude $bedFile > $output </command> <inputs> <param format="vcf" name="input" type="data" label="VCF input"/>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeff_annotations.loc.sample Fri Oct 04 09:09:29 2013 -0500 @@ -0,0 +1,5 @@ +## Regulation Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +#genome annotation_name description +#GRCh37.71 nextprot nextprot +#GRCh37.71 motif motif
--- a/tool-data/snpeff_regulationdb.loc.sample Thu Jul 04 09:49:15 2013 -0500 +++ b/tool-data/snpeff_regulationdb.loc.sample Fri Oct 04 09:09:29 2013 -0500 @@ -1,5 +1,4 @@ -## Databases for SnpEff +## Regulation Databases for SnpEff ## These are from the list on: http://snpeff.sourceforge.net/download.html -## the Description field in this sample is "Genome : Version" -#Genome Regulation_Name -#GRCh37.70 CD4 +#genome regulation_name description +#GRCh37.70 CD4 CD4