# HG changeset patch # User devteam # Date 1447088330 18000 # Node ID 525e6995fe4470a74485552c413c9a118b03ce85 planemo upload for repository Nonehttps://github.com/galaxyproject/tools-devteam/tree/master/tools/table_annovar commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b diff -r 000000000000 -r 525e6995fe44 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,24 @@ +ANNOVAR needs to be installed manually in the following way: + +1a) If you already have ANNOVAR installed on your system, simply edit the tool-data/annovar.loc file to reflect locations of + the perl scripts (annotate_variation.pl and convert2annovar.pl) and humandb directory (directory containing the annovar database files) + +1b) If you do not have ANNOVAR installed, request annovar download and sign license here: + http://www.openbioinformatics.org/annovar/annovar_download_form.php + + i) Once downloaded, install annovar per the installation instructions and note the installation path. + + ii) Then download all desired databases for all desired builds as follows: + annotate_variation.pl -downdb -buildver [-webfrom annovar] + + where is location where all database files should be stored + and is the database file to download, e.g. refGene (see bottom of document for all available database files at the time of writing this tool) + and can be hg18 or hg19 for humans, also other organisms available. + + list of all available databases can be found here: http://www.openbioinformatics.org/annovar/annovar_db.html + + iii) edit the tool-data/annovar.loc file to reflect location of humandb folder + +2) add the annovar scripts convert2annovar.pl and table_annovar.pl to your Galaxy user's path + +3) restart galaxy instance for changes in .loc file to take effect \ No newline at end of file diff -r 000000000000 -r 525e6995fe44 replace_NA.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_NA.py Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +# Reads a tabular file and replaces a target sequence (currently 'NA') with a number in columns that have numerical values. +# Limitations: (a) can only take input from stdin and (b) cannot specify target or replacement. + +import sys +import os +import tempfile + +# Constants. +SEPARATOR = '\t' +TARGET = 'NA' +REPLACEMENT = -1 +# List of known numerical columns. +NUMERICAL_COLUMNS = ['1000g2012apr_all', 'esp6500si_all'] + +# Use tempfile to store data. +temp_out = tempfile.NamedTemporaryFile(delete=False) + +# Use first line to set up data structure and identify numerical columns. +first_line = sys.stdin.readline() +fields = first_line.strip().split(SEPARATOR) +numerical_cols = [] +for i, f in enumerate(fields): + if f in NUMERICAL_COLUMNS: + numerical_cols.append(i) + +# Data structure is a 2-element list for each fields; first element is # of string elements and second element is # of number elements. +col_type_counts = [ [0, 0] for i in range( len(fields) ) ] + +# Set up function to process lines. +def process_line_fields(fields): + ''' + Process fields in a line. + ''' + for i, f in enumerate(fields): + # Ignore targets in calculation. + if f == TARGET: + continue + + # Assume it's a number. + type_index = 1 + try: + float(f) + except: + # Not a number. + type_index = 0 + col_type_counts[i][type_index] += 1 + + +# Process first line. +process_line_fields(fields) +temp_out.write(first_line) + +# Process N-1 lines. +for line in sys.stdin: + fields = line.strip().split(SEPARATOR) + process_line_fields(fields) + temp_out.write(line) + +# Close temp file so that it can be read. +temp_name = temp_out.name +temp_out.close() + +# Get column type based on label or consensus. +col_types = range(len(col_type_counts)) +for i, counts in enumerate(col_type_counts): + if i in numerical_cols: + col_type = 'number' + elif counts[0] > counts[1]: + col_type = 'string' + else: + col_type = 'number' + col_types[i] = col_type + +# Replace target in number columns. +for line in open(temp_name, 'r'): + fields = line.strip().split(SEPARATOR) + for i, f in enumerate(fields): + if fields[i] == TARGET and col_types[i] == 'number': + fields[i] = str(REPLACEMENT) + print SEPARATOR.join(fields) + +# Clean up temp file. +temp_out.close() +os.unlink(temp_out.name) + + + diff -r 000000000000 -r 525e6995fe44 table_annovar.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table_annovar.xml Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,126 @@ + + with functional information using ANNOVAR + + + annovar + SCRIPT_PATH + + + + ## Convert VCF to AV input format. + #if str($out_format) == "tabular": + convert2annovar.pl -format vcf4 -includeinfo ${input} > input.avinput ; + #set tab_anno_input = "input.avinput" + #else: + #set tab_anno_input = $input + #end if + + ## Variant annotation; make sure to include entry in indexes table for build database. + + #set protocol = [] + #set operation = [] + + ## Add gene annotations. + #if $gene_anns: + #silent protocol.append( str( $gene_anns ) ) + #silent operation.append( ','.join( ['g' for t in range( str($gene_anns).count(',') + 1 )] ) ) + #end if + + ## Add regions. + #if $regions: + #silent protocol.append( str( $regions ) ) + #silent operation.append( ','.join( ['r' for t in range( str($regions).count(',') + 1 )] ) ) + #end if + + ## Add filters. + #if $filters: + #silent protocol.append( str( $filters ) ) + #silent operation.append( ','.join( ['f' for t in range( str($filters).count(',') + 1 )] ) ) + #end if + + #set protocol = ','.join( $protocol ) + #set operation = ','.join( $operation ) + + ## Annotate variants. + table_annovar.pl ${tab_anno_input} ${__get_data_table_entry__('annovar_indexes', 'dbkey', $input.dbkey, 'path')} -protocol ${protocol} -operation ${operation} -nastring '.' -buildver ${input.dbkey} --outfile output + + ## Add option to consume/produce VCF. + #if str($out_format) == "vcf": + --vcfinput + #end if + + ## Post-processing: process annotated table to remove "NA" strings from numerical columns if + ## tabular. Copy to output. + #if str($out_format) == "tabular": + ; cat output.${input.dbkey}_multianno.txt | python \${SCRIPT_PATH}/replace_NA.py > ${output} + #else: + ; cp output.${input.dbkey}_multianno.vcf ${output} + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool will annotate variants using specified gene annotations, regions, and filtering databases. Input is a VCF dataset, and output is a table of annotations for each variant in the +VCF dataset or a VCF dataset with the annotations in INFO fields. + +**ANNOVAR Website and Documentation** + +Website: http://www.openbioinformatics.org/annovar/ + +Paper: http://nar.oxfordjournals.org/content/38/16/e164 + +**Important Usage Note** + +ANNOVAR is open-source and free for non-profit use. If you use it for commercial purposes, please contact BIOBASE (info@biobase-international.com) directly for license related issues. Also see http://www.openbioinformatics.org/annovar/annovar_faq.html#license + + + diff -r 000000000000 -r 525e6995fe44 tool-data/annovar_index.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/annovar_index.loc.sample Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,20 @@ +# +# Database name (value), dbkey, type, and path. +# +# Sample entries for gene-based annotations: +#refGene hg19 gene_ann /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#wgEncodeGencodeCompV14 hg19 gene_ann /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +# +# Samples entries for region-based annotations: +# +#genomicSuperDups hg19 region /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#phastConsElements46way hg19 region /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +# +# Sample entries for filter-based annotations: +# +#1000g2012apr_all hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#avsift hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#snp137NonFlagged hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#esp6500si_all hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndice/ +#snp137 hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ +#cosmic64 hg19 filter /aut/bx/jgoecks/galaxy/data/Homo_sapiens/UCSC/hg19/AnnovarIndices/ \ No newline at end of file diff -r 000000000000 -r 525e6995fe44 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,7 @@ + + + + value, dbkey, type, path + +
+
\ No newline at end of file diff -r 000000000000 -r 525e6995fe44 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Nov 09 11:58:50 2015 -0500 @@ -0,0 +1,6 @@ + + + + $REPOSITORY_INSTALL_DIR + + \ No newline at end of file