Mercurial > repos > jjohnson > snpeff_with_dep
diff snpEff.xml @ 24:f22f28cd881e
Update for snpEff v3.2
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 07 May 2013 14:58:15 -0500 |
parents | 50c1a8ff6c82 |
children |
line wrap: on
line diff
--- a/snpEff.xml Thu Jan 17 14:25:58 2013 -0600 +++ b/snpEff.xml Tue May 07 14:58:15 2013 -0500 @@ -1,37 +1,127 @@ -<tool id="snpEff" name="SnpEff" version="3.1"> +<tool id="snpEff" name="SnpEff" version="3.2"> <description>Variant effect and annotation</description> <!-- You will need to change the path to wherever your installation is. You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) <command>java -Xmx6G -jar /path/to/your/snpEff/snpEff.jar eff -c /path/to/your/snpEff/snpEff/snpEff.config $inputFormat $offset -upDownStreamLen $udLength $filterIn $filterHomHet -no $filterOut -stats $statsFile $genomeVersion $input > $output </command> +Options: + -a , -around : Show N codons and amino acids around change (only in coding regions). Default is 0 codons. + -i <format> : Input format [ vcf, txt, pileup, bed ]. Default: VCF. + -o <format> : Ouput format [ txt, vcf, gatk, bed, bedAnn ]. Default: VCF. + -interval : Use a custom interval file (you may use this option many times) + -chr <string> : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output. + -s, -stats : Name of stats file (summary). Default is 'snpEff_summary.html' + -t : Use multiple threads (implies '-noStats'). Default 'off' + +Sequence change filter options: + -del : Analyze deletions only + -ins : Analyze insertions only + -hom : Analyze homozygous variants only + -het : Analyze heterozygous variants only + -minQ X, -minQuality X : Filter out variants with quality lower than X + -maxQ X, -maxQuality X : Filter out variants with quality higher than X + -minC X, -minCoverage X : Filter out variants with coverage lower than X + -maxC X, -maxCoverage X : Filter out variants with coverage higher than X + -nmp : Only MNPs (multiple nucleotide polymorphisms) + -snp : Only SNPs (single nucleotide polymorphisms) + +Results filter options: + -fi <bedFile> : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times) + -no-downstream : Do not show DOWNSTREAM changes + -no-intergenic : Do not show INTERGENIC changes + -no-intron : Do not show INTRON changes + -no-upstream : Do not show UPSTREAM changes + -no-utr : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes + +Annotations options: + -cancer : Perform 'cancer' comparissons (Somatic vs Germline). Default: false + -canon : Only use canonical transcripts. + -geneId : Use gene ID instead of gene name (VCF output). Default: false + -hgvs : Use HGVS annotations for amino acid sub-field. Default: false + -lof : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags. + -reg <name> : Regulation track to use (this option can be used add several times). + -oicr : Add OICR tag in VCF file. Default: false + -onlyReg : Only use regulation tracks. + -onlyTr <file.txt> : Only use the transcripts in this file. Format: One transcript ID per line. + -sequenceOntolgy : Use Sequence Ontolgy terms. Default: false + -ss, -spliceSiteSize <int> : Set size for splice sites (donor and acceptor) in bases. Default: 2 + -ud, -upDownStreamLen <int> : Set upstream downstream interval length (in bases) + +Generic options: + -0 : File positions are zero-based (same as '-inOffset 0 -outOffset 0') + -1 : File positions are one-based (same as '-inOffset 1 -outOffset 1') + -c , -config : Specify config file + -h , -help : Show this help and exit + -if, -inOffset : Offset input by a number of bases. E.g. '-inOffset 1' for one-based input files + -of, -outOffset : Offset output by a number of bases. E.g. '-outOffset 1' for one-based output files + -noLog : Do not report usage statistics to server + -noStats : Do not create stats (summary) file + -q , -quiet : Quiet mode (do not show any messages or errors) + -v , -verbose : Verbose mode + --> <requirements> - <requirement type="package" version="3.1">snpEff</requirement> + <requirement type="package" version="3.2">snpEff</requirement> </requirements> <command> -export SNPEFF_DATA_DIR=`grep '^data_dir' \$JAVA_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`; -if [ ! -e \$SNPEFF_DATA_DIR/$genomeVersion ] ; +SNPEFF_DATA_DIR=`grep '^data_dir' \$JAVA_JAR_PATH/snpEff.config | sed 's/.*data_dir.*[=:]//'`; +eval "if [ ! -e \$SNPEFF_DATA_DIR/$genomeVersion ] ; then java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion ; -fi; -java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar eff -c \$JAVA_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength $filterIn $filterHomHet -no $filterOut -stats $statsFile $genomeVersion $input > $output </command> +fi"; +java -Xmx6G -jar \$JAVA_JAR_PATH/snpEff.jar eff -c \$JAVA_JAR_PATH/snpEff.config -i $inputFormat -o $outputFormat -upDownStreamLen $udLength +#if $spliceSiteSize and $spliceSiteSize.__str__ != '': + -spliceSiteSize $spliceSiteSize +#end if +#if $filterIn and $filterIn.__str__ != 'no_filter': + -$filterIn +#end if +#if $filterHomHet and $filterHomHet.__str__ != 'no_filter': + -$filterHomHet +#end if +#if $annotations and $annotations.__str__ != '': + -#slurp + #echo ' -'.join($annotations.__str__.split(',')) +#end if +#if $filterOut and $filterOut.__str__ != '': + -#slurp + #echo ' -'.join($filterOut.__str__.split(',')) +#end if +#if str( $transcripts ) != 'None': + -onlyTr $transcripts +#end if +#if str( $intervals ) != 'None': ### fix this for multiple dataset input + -interval $intervals +#end if +#if $statsFile: + -stats $statsFile +#end if +#if $offset.__str__ != '': + -${offset} +#end if +#if $chr.__str__.strip() != '': + -chr "$chr" +#end if + $noLog $genomeVersion $input > $snpeff_output +</command> <inputs> <param format="vcf,tabular,pileup,bed" name="input" type="data" label="Sequence changes (SNPs, MNPs, InDels)"/> <param name="inputFormat" type="select" label="Input format"> - <option value="vcf">VCF</option> - <option value="txt">Tabular</option> - <option value="pileup">Pileup</option> - <option value="bed">BED</option> + <option value="vcf" selected="true">VCF</option> + <option value="txt">Tabular (Deprecated)</option> + <option value="pileup">Pileup (Deprecated)</option> + <option value="bed">BED (Deprecated)</option> </param> <param name="outputFormat" type="select" label="Output format"> + <option value="vcf" selected="true">VCF (only if input is VCF)</option> <option value="txt">Tabular</option> - <option value="vcf">VCF (only if input is VCF)</option> <option value="bed">BED</option> <option value="bedAnn">BED Annotations</option> </param> <param name="genomeVersion" type="select" label="Genome"> + <!--GENOME DESCRIPTION--> <options from_file="snpeffect_genomedb.loc"> <column name="name" index="1"/> <column name="value" index="0"/> @@ -49,50 +139,183 @@ <option value="20000">20000 bases</option> </param> - <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes"> - <option value="">No filter (analyze everything)</option> - <option value="-hom">Analyze homozygous sequence changes only </option> - <option value="-het">Analyze heterozygous sequence changes only </option> - </param> + <param name="spliceSiteSize" type="select" optional="true" label="Set size for splice sites (donor and acceptor) in bases. Default: 2"> + <option value="1">1 base</option> + <option value="2">2 bases</option> + <option value="3">3 bases</option> + <option value="4">4 bases</option> + <option value="5">5 bases</option> + <option value="6">6 bases</option> + <option value="7">7 bases</option> + <option value="8">8 bases</option> + <option value="9">9 bases</option> + </param> - <param name="filterIn" type="select" display="radio" label="Filter sequence changes"> - <option value="">No filter (analyze everything)</option> - <option value="-del">Analyze deletions only </option> - <option value="-ins">Analyze insertions only </option> - <option value="-nmp">Only MNPs (multiple nucleotide polymorphisms) </option> - <option value="-snp">Only SNPs (single nucleotide polymorphisms) </option> - </param> + <param name="filterHomHet" type="select" display="radio" label="Filter homozygous / heterozygous changes"> + <option value="no_filter" selected="true">No filter (analyze everything)</option> + <option value="hom">Analyze homozygous sequence changes only </option> + <option value="het">Analyze heterozygous sequence changes only </option> + </param> + + <!-- The tool testing code can not handle select,radio,checkbox values that start with '-', so the '-' is added in the command generation --> + <param name="filterIn" type="select" display="radio" label="Filter sequence changes"> + <option value="no_filter" selected="true">No filter (analyze everything)</option> + <option value="del">Analyze deletions only </option> + <option value="ins">Analyze insertions only </option> + <option value="mnp">Only MNPs (multiple nucleotide polymorphisms) </option> + <option value="snp">Only SNPs (single nucleotide polymorphisms) </option> + </param> - <param name="filterOut" type="select" display="checkboxes" multiple="true" optional="false" value="None" label="Filter output"> - <option value="None" selected="true">None</option> - <option value="downstream">Do not show DOWNSTREAM changes </option> - <option value="intergenic">Do not show INTERGENIC changes </option> - <option value="intron">Do not show INTRON changes </option> - <option value="upstream">Do not show UPSTREAM changes </option> - <option value="utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option> - </param> + <param name="annotations" type="select" display="checkboxes" multiple="true" optional="true" label="Annotation options"> + <option value="cancer">Perform 'cancer' comparissons (Somatic vs Germline). Default: false</option> + <option value="canon">Only use canonical transcripts.</option> + <option value="geneId">Use gene ID instead of gene name (VCF output). Default: false</option> + <option value="hgvs">Use HGVS annotations for amino acid sub-field. Default: false</option> + <option value="lof">Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.</option> + <option value="oicr">Add OICR tag in VCF file. Default: false</option> + <option value="onlyReg">Only use regulation tracks.</option> + <option value="sequenceOntolgy">Use Sequence Ontolgy terms. Default: false</option> + </param> + + <param name="regulation" type="select" display="checkboxes" multiple="true" optional="true" label="Non-coding and regulatory Annotation"> + <help>These are available for only a few genomes</help> + <!--GENOME REG_NAME --> + <options from_file="snpeffect_regulationdb.loc"> + <column name="name" index="1"/> + <column name="value" index="0"/> + <filter type="param_value" ref="genomeVersion" key="name" column="1" /> + </options> + </param> - <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position"> - <option value="">Use default (based on input type)</option> - <option value="-0">Force zero-based positions (both input and output)</option> - <option value="-1">Force one-based positions (both input and output)</option> + <param name="intervals" format="bed" type="data" optional="true" label="Use custom interval file for annotation"/> + <param name="transcripts" format="tabular" type="data" optional="true" label="Only use the transcripts in this file. Format: One transcript ID per line."/> + + <param name="filterOut" type="select" display="checkboxes" multiple="true" optional="true" label="Filter output"> + <option value="no-downstream">Do not show DOWNSTREAM changes </option> + <option value="no-intergenic">Do not show INTERGENIC changes </option> + <option value="no-intron">Do not show INTRON changes </option> + <option value="no-upstream">Do not show UPSTREAM changes </option> + <option value="no-utr">Do not show 5_PRIME_UTR or 3_PRIME_UTR changes </option> + </param> + + <param name="offset" type="select" display="radio" optional="true" label="Chromosomal position"> + <option value="" selected="true">Use default (based on input type)</option> + <option value="0">Force zero-based positions (both input and output)</option> + <option value="1">Force one-based positions (both input and output)</option> </param> + <param name="chr" type="text" optionl="true" label="Text to prepend to chromosome name" help="By default SnpEff simplifies all chromosome names. For instance 'chr1' is just '1'. You can prepend any string you want to the chromosome name."> + <validator type="regex" message="No whitespace allows">^\S*$</validator> + + </param> + <param name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats"/> + <param name="noLog" type="boolean" truevalue="-noLog" falsevalue="" checked="true" label="Do not report usage statistics to server"/> </inputs> <outputs> - <data format="tabular" name="output" > + <data format="vcf" name="snpeff_output" > <change_format> + <when input="outputFormat" value="vcf" format="vcf" /> <when input="outputFormat" value="txt" format="tabular" /> - <when input="outputFormat" value="vcf" format="vcf" /> <when input="outputFormat" value="bed" format="bed" /> <when input="outputFormat" value="bedAnn" format="bed" /> </change_format> </data> - <data format="html" name="statsFile" /> + + <data format="html" name="statsFile"> + <filter>generate_stats == True</filter> + </data> </outputs> <stdio> - <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> <exit_code range="1:" level="fatal" description="Error" /> + <exit_code range="-1" level="fatal" description="Error: Cannot open file" /> </stdio> + <tests> + <test> + <param name="input" ftype="vcf" value="vcf_homhet.vcf"/> + <param name="inputFormat" value="vcf"/> + <param name="outputFormat" value="vcf"/> + <param name="genomeVersion" value="testCase"/> + <param name="udLength" value="0"/> + <param name="filterHomHet" value="no_filter"/> + <param name="filterIn" value="no_filter"/> + <param name="generate_stats" value="False"/> + <!-- + <param name="filterOut" value="no-upstream"/> + --> + <output name="snpeff_output"> + <assert_contents> + <!-- Check that an effect was added --> + <has_text text="EFF=" /> + </assert_contents> + </output> + <!-- Check for a HTML header indicating that this was successful --> + <!-- + <output name="statsFile"> + <assert_contents> + <has_text text="SnpEff: Variant analysis" /> + </assert_contents> + </output> + --> + </test> + + <test> + <param name="input" ftype="vcf" value="vcf_homhet.vcf"/> + <param name="inputFormat" value="vcf"/> + <param name="outputFormat" value="vcf"/> + <param name="genomeVersion" value="testCase"/> + <param name="udLength" value="0"/> + <param name="filterHomHet" value="het"/> + <param name="filterIn" value="no_filter"/> + <!-- + <param name="filterOut" value=""/> + --> + <param name="generate_stats" value="False"/> + <output name="snpeff_output"> + <assert_contents> + <!-- Check that NO effects were added since -het is set --> + <not_has_text text="EFF=NON_SYNONYMOUS_CODING" /> + </assert_contents> + </output> + </test> + <test> + <param name="input" ftype="vcf" value="vcf_homhet.vcf"/> + <param name="inputFormat" value="vcf"/> + <param name="outputFormat" value="vcf"/> + <param name="genomeVersion" value="testCase"/> + <param name="udLength" value="0"/> + <param name="filterHomHet" value="no_filter"/> + <param name="filterIn" value="del"/> + <!-- + <param name="filterOut" value=""/> + --> + <param name="generate_stats" value="False"/> + <output name="snpeff_output"> + <assert_contents> + <!-- Check that deleletions were evaluated --> + <has_text_matching expression="Y\t59030478\t.*EFF=INTERGENIC" /> + <!-- Check that insertion on last line was NOT evaluated --> + <has_text_matching expression="Y\t59032947\t.*SF=5\tGT" /> + </assert_contents> + </output> + </test> + <test> + <param name="input" ftype="vcf" value="vcf_homhet.vcf"/> + <param name="inputFormat" value="vcf"/> + <param name="outputFormat" value="vcf"/> + <param name="genomeVersion" value="testCase"/> + <param name="udLength" value="0"/> + <param name="filterHomHet" value="no_filter"/> + <param name="filterIn" value="no_filter"/> + <param name="filterOut" value="no-upstream"/> + <param name="generate_stats" value="False"/> + <output name="snpeff_output"> + <assert_contents> + <!-- Check that NO UPSTREAM effect was added --> + <not_has_text text="UPSTREAM" /> + </assert_contents> + </output> + </test> + + </tests> <help> This tool calculate the effect of variants (SNPs/MNPs/Insertions) and deletions.