# HG changeset patch # User saskia-hiltemann # Date 1384789437 18000 # Node ID ed770baa1f165d5ef8a9b6dd9b7510d0325d9653 # Parent 60630d56ab1f50a90565a7c73128792e76855d51 added support for LJB2, COSMIC67, CLINVAR, NCI60 annotations. Fixed DGV annotation, now uses new UCSC file named dgvMerged diff -r 60630d56ab1f -r ed770baa1f16 README --- a/README Wed Sep 04 08:02:09 2013 -0400 +++ b/README Mon Nov 18 10:43:57 2013 -0500 @@ -33,179 +33,210 @@ list of files in my own humandb folder: - hg18_ALL.sites.2012_04.txt - hg18_ALL.sites.2012_04.txt.idx - hg18_avsift.txt - hg18_avsift.txt.idx - hg18_CEU.sites.2010_07.txt - hg18_CEU.sites.2010_07.txt.idx - hg18_cg46.txt - hg18_cg46.txt.idx - hg18_cg69.txt - hg18_cg69.txt.idx - hg18_cytoBand.txt - hg18_dgv.txt - hg18_ensGeneMrna.fa - hg18_ensGene.txt - hg18_esp5400_aa.txt - hg18_esp5400_aa.txt.idx - hg18_esp5400_all.txt - hg18_esp5400_all.txt.idx - hg18_esp5400_ea.txt - hg18_esp5400_ea.txt.idx - hg18_esp6500_aa.txt - hg18_esp6500_aa.txt.idx - hg18_esp6500_all.txt - hg18_esp6500_all.txt.idx - hg18_esp6500_ea.txt - hg18_esp6500_ea.txt.idx - hg18_esp6500si_aa.txt - hg18_esp6500si_aa.txt.idx - hg18_esp6500si_all.txt - hg18_esp6500si_all.txt.idx - hg18_esp6500si_ea.txt - hg18_esp6500si_ea.txt.idx - hg18_example_db_generic.txt - hg18_example_db_gff3.txt - hg18_genomicSuperDups.txt - hg18_gerp++gt2.txt - hg18_gerp++gt2.txt.idx - hg18_gwasCatalog.txt - hg18_JPTCHB.sites.2010_07.txt - hg18_JPTCHB.sites.2010_07.txt.idx - hg18_keggMapDesc.txt - hg18_keggPathway.txt - hg18_kgXref.txt - hg18_knownGeneMrna.fa - hg18_knownGene.txt - hg18_ljb_all.txt - hg18_ljb_all.txt.idx - hg18_ljb_lrt.txt - hg18_ljb_lrt.txt.idx - hg18_ljb_mt.txt - hg18_ljb_mt.txt.idx - hg18_ljb_phylop.txt - hg18_ljb_phylop.txt.idx - hg18_ljb_pp2.txt - hg18_ljb_pp2.txt.idx - hg18_ljb_sift.txt - hg18_ljb_sift.txt.idx - hg18_phastConsElements44way.txt - hg18_refGeneMrna.fa - hg18_refGene.txt - hg18_refLink.txt - hg18_snp128NonFlagged.txt - hg18_snp128NonFlagged.txt.idx - hg18_snp128.txt - hg18_snp128.txt.idx - hg18_snp129NonFlagged.txt - hg18_snp129NonFlagged.txt.idx - hg18_snp129.txt - hg18_snp129.txt.idx - hg18_snp130NonFlagged.txt - hg18_snp130NonFlagged.txt.idx - hg18_snp130.txt - hg18_snp130.txt.idx - hg18_snp131NonFlagged.txt - hg18_snp131NonFlagged.txt.idx - hg18_snp131.txt - hg18_snp131.txt.idx - hg18_snp132NonFlagged.txt - hg18_snp132NonFlagged.txt.idx - hg18_snp132.txt - hg18_snp132.txt.idx - hg18_tfbsConsSites.txt - hg18_YRI.sites.2010_07.txt - hg18_YRI.sites.2010_07.txt.idx - hg19_AFR.sites.2012_04.txt - hg19_AFR.sites.2012_04.txt.idx - hg19_ALL.sites.2010_11.txt - hg19_ALL.sites.2010_11.txt.idx - hg19_ALL.sites.2012_02.txt - hg19_ALL.sites.2012_02.txt.idx - hg19_ALL.sites.2012_04.txt - hg19_ALL.sites.2012_04.txt.idx - hg19_AMR.sites.2012_04.txt - hg19_AMR.sites.2012_04.txt.idx - hg19_ASN.sites.2012_04.txt - hg19_ASN.sites.2012_04.txt.idx - hg19_avsift.txt - hg19_avsift.txt.idx - hg19_cg46.txt - hg19_cg46.txt.idx - hg19_cg69.txt - hg19_cg69.txt.idx - hg19_cosmic61.txt - hg19_cosmic61.txt.idx - hg19_cosmic63.txt - hg19_cosmic63.txt.idx - hg19_cosmic64.txt - hg19_cosmic64.txt.idx - hg19_cytoBand.txt - hg19_dgv.txt - hg19_ensGeneMrna.fa - hg19_ensGene.txt - hg19_esp5400_aa.txt - hg19_esp5400_aa.txt.idx - hg19_esp5400_all.txt - hg19_esp5400_all.txt.idx - hg19_esp5400_ea.txt - hg19_esp5400_ea.txt.idx - hg19_esp6500_aa.txt - hg19_esp6500_aa.txt.idx - hg19_esp6500_all.txt - hg19_esp6500_all.txt.idx - hg19_esp6500_ea.txt - hg19_esp6500_ea.txt.idx - hg19_esp6500si_aa.txt - hg19_esp6500si_aa.txt.idx - hg19_esp6500si_all.txt - hg19_esp6500si_all.txt.idx - hg19_esp6500si_ea.txt - hg19_esp6500si_ea.txt.idx - hg19_EUR.sites.2012_04.txt - hg19_EUR.sites.2012_04.txt.idx - hg19_genomicSuperDups.txt - hg19_gerp++gt2.txt - hg19_gerp++gt2.txt.idx - hg19_gwasCatalog.txt - hg19_keggMapDesc.txt - hg19_keggPathway.txt - hg19_kgXref.txt - hg19_knownGeneMrna.fa - hg19_knownGene.txt - hg19_ljb_all.txt - hg19_ljb_all.txt.idx - hg19_ljb_lrt.txt - hg19_ljb_lrt.txt.idx - hg19_ljb_mt.txt - hg19_ljb_mt.txt.idx - hg19_ljb_phylop.txt - hg19_ljb_phylop.txt.idx - hg19_ljb_pp2.txt - hg19_ljb_pp2.txt.idx - hg19_ljb_sift.txt - hg19_ljb_sift.txt.idx - hg19_phastConsElements46way.txt - hg19_refGeneMrna.fa - hg19_refGene.txt - hg19_refLink.txt - hg19_snp130NonFlagged.txt - hg19_snp130NonFlagged.txt.idx - hg19_snp130.txt - hg19_snp130.txt.idx - hg19_snp131NonFlagged.txt - hg19_snp131NonFlagged.txt.idx - hg19_snp131.txt - hg19_snp132NonFlagged.txt - hg19_snp132NonFlagged.txt.idx - hg19_snp132.txt - hg19_snp132.txt.idx - hg19_snp135NonFlagged.txt - hg19_snp135NonFlagged.txt.idx - hg19_snp135.txt - hg19_snp137NonFlagged.txt - hg19_snp137NonFlagged.txt.idx - hg19_snp137.txt - hg19_tfbsConsSites.txt +hg18_ALL.sites.2012_04.txt +hg18_ALL.sites.2012_04.txt.idx +hg18_CEU.sites.2010_07.txt +hg18_CEU.sites.2010_07.txt.idx +hg18_JPTCHB.sites.2010_07.txt +hg18_JPTCHB.sites.2010_07.txt.idx +hg18_YRI.sites.2010_07.txt +hg18_YRI.sites.2010_07.txt.idx +hg18_cg46.txt +hg18_cg46.txt.idx +hg18_cg69.txt +hg18_cg69.txt.idx +hg18_cytoBand.txt +hg18_dgvMerged.txt +hg18_ensGene.txt +hg18_ensGeneMrna.fa +hg18_esp5400_aa.txt +hg18_esp5400_aa.txt.idx +hg18_esp5400_all.txt +hg18_esp5400_all.txt.idx +hg18_esp6500_aa.txt +hg18_esp6500_aa.txt.idx +hg18_esp6500_all.txt +hg18_esp6500_all.txt.idx +hg18_esp6500_ea.txt +hg18_esp6500_ea.txt.idx +hg18_esp6500si_aa.txt +hg18_esp6500si_aa.txt.idx +hg18_esp6500si_all.txt +hg18_esp6500si_all.txt.idx +hg18_esp6500si_ea.txt +hg18_esp6500si_ea.txt.idx +hg18_example_db_generic.txt +hg18_example_db_gff3.txt +hg18_genomicSuperDups.txt +hg18_gerp++gt2.txt +hg18_gerp++gt2.txt.idx +hg18_gwasCatalog.txt +hg18_kgXref.txt +hg18_knownGene.txt +hg18_knownGeneMrna.fa +hg18_ljb2_fathmm.txt +hg18_ljb2_fathmm.txt.idx +hg18_ljb2_gerp++.txt +hg18_ljb2_gerp++.txt.idx +hg18_ljb2_ma.txt +hg18_ljb2_ma.txt.idx +hg18_ljb2_mt.txt +hg18_ljb2_mt.txt.idx +hg18_ljb2_phylop.txt +hg18_ljb2_phylop.txt.idx +hg18_ljb2_pp2hdiv.txt +hg18_ljb2_pp2hdiv.txt.idx +hg18_ljb2_pp2hvar.txt +hg18_ljb2_pp2hvar.txt.idx +hg18_ljb2_sift.txt +hg18_ljb2_sift.txt.idx +hg18_ljb2_siphy.txt +hg18_ljb2_siphy.txt.idx +hg18_phastConsElements44way.txt +hg18_refGene.txt +hg18_refGeneMrna.fa +hg18_refLink.txt +hg18_snp128.txt +hg18_snp128.txt.idx +hg18_snp128NonFlagged.txt +hg18_snp128NonFlagged.txt.idx +hg18_snp129.txt +hg18_snp129.txt.idx +hg18_snp129NonFlagged.txt +hg18_snp129NonFlagged.txt.idx +hg18_snp130.txt +hg18_snp130.txt.idx +hg18_snp130NonFlagged.txt +hg18_snp130NonFlagged.txt.idx +hg18_snp131.txt +hg18_snp131.txt.idx +hg18_snp131NonFlagged.txt +hg18_snp131NonFlagged.txt.idx +hg18_snp132.txt +hg18_snp132.txt.idx +hg18_snp132NonFlagged.txt +hg18_snp132NonFlagged.txt.idx +hg18_tfbsConsSites.txt +hg19_AFR.sites.2012_04.txt +hg19_AFR.sites.2012_04.txt.idx +hg19_ALL.sites.2010_11.txt +hg19_ALL.sites.2010_11.txt.idx +hg19_ALL.sites.2012_02.txt +hg19_ALL.sites.2012_02.txt.idx +hg19_ALL.sites.2012_04.txt +hg19_ALL.sites.2012_04.txt.idx +hg19_AMR.sites.2012_04.txt +hg19_AMR.sites.2012_04.txt.idx +hg19_ASN.sites.2012_04.txt +hg19_ASN.sites.2012_04.txt.idx +hg19_EUR.sites.2012_04.txt +hg19_EUR.sites.2012_04.txt.idx +hg19_avsift.txt +hg19_avsift.txt.idx +hg19_cg46.txt +hg19_cg46.txt.idx +hg19_cg69.txt +hg19_cg69.txt.idx +hg19_clinvar_20131105.txt +hg19_clinvar_20131105.txt.idx +hg19_cosmic61.txt +hg19_cosmic61.txt.idx +hg19_cosmic63.txt +hg19_cosmic63.txt.idx +hg19_cosmic64.txt +hg19_cosmic64.txt.idx +hg19_cosmic65.txt +hg19_cosmic65.txt.idx +hg19_cosmic67.txt +hg19_cytoBand.txt +hg19_dgvMerged.txt +hg19_ensGene.txt +hg19_ensGeneMrna.fa +hg19_esp5400_aa.txt +hg19_esp5400_aa.txt.idx +hg19_esp5400_all.txt +hg19_esp5400_all.txt.idx +hg19_esp6500_aa.txt +hg19_esp6500_aa.txt.idx +hg19_esp6500_all.txt +hg19_esp6500_all.txt.idx +hg19_esp6500_ea.txt +hg19_esp6500_ea.txt.idx +hg19_esp6500si_aa.txt +hg19_esp6500si_aa.txt.idx +hg19_esp6500si_all.txt +hg19_esp6500si_all.txt.idx +hg19_esp6500si_ea.txt +hg19_esp6500si_ea.txt.idx +hg19_genomicSuperDups.txt +hg19_gerp++gt2.txt +hg19_gerp++gt2.txt.idx +hg19_gwasCatalog.txt +hg19_kgXref.txt +hg19_knownGene.txt +hg19_knownGeneMrna.fa +hg19_ljb2_fathmm.txt +hg19_ljb2_fathmm.txt.idx +hg19_ljb2_gerp++.txt +hg19_ljb2_gerp++.txt.idx +hg19_ljb2_ma.txt +hg19_ljb2_ma.txt.idx +hg19_ljb2_mt.txt +hg19_ljb2_phylop.txt +hg19_ljb2_phylop.txt.idx +hg19_ljb2_pp2hdiv.txt +hg19_ljb2_pp2hdiv.txt.idx +hg19_ljb2_pp2hvar.txt +hg19_ljb2_pp2hvar.txt.idx +hg19_ljb2_sift.txt +hg19_ljb2_sift.txt.idx +hg19_ljb2_siphy.txt +hg19_nci60.txt +hg19_nci60.txt.idx +hg19_phastConsElements46way.txt +hg19_refGene.txt +hg19_refGeneMrna.fa +hg19_refLink.txt +hg19_snp130.txt +hg19_snp130.txt.idx +hg19_snp130NonFlagged.txt +hg19_snp130NonFlagged.txt.idx +hg19_snp131.txt +hg19_snp131NonFlagged.txt +hg19_snp131NonFlagged.txt.idx +hg19_snp132.txt +hg19_snp132.txt.idx +hg19_snp132NonFlagged.txt +hg19_snp132NonFlagged.txt.idx +hg19_snp135.txt +hg19_snp135NonFlagged.txt +hg19_snp135NonFlagged.txt.idx +hg19_snp137.txt +hg19_snp137NonFlagged.txt +hg19_snp137NonFlagged.txt.idx +hg19_tfbsConsSites.txt + +obsolete functional impact database files: (disabled by default) +hg18_avsift.txt +hg18_avsift.txt.idx +hg19_ljb_all.txt +hg19_ljb_all.txt.idx +hg19_ljb_lrt.txt +hg19_ljb_lrt.txt.idx +hg19_ljb_mt.txt +hg19_ljb_mt.txt.idx +hg19_ljb_phylop.txt +hg19_ljb_phylop.txt.idx +hg19_ljb_pp2.txt +hg19_ljb_pp2.txt.idx +hg18_ljb_all.txt +hg18_ljb_all.txt.idx +hg18_ljb_lrt.txt +hg18_ljb_lrt.txt.idx +hg18_ljb_mt.txt +hg18_ljb_mt.txt.idx +hg18_ljb_phylop.txt +hg18_ljb_phylop.txt.idx +hg18_ljb_pp2.txt +hg18_ljb_pp2.txt.idx diff -r 60630d56ab1f -r ed770baa1f16 README~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README~ Mon Nov 18 10:43:57 2013 -0500 @@ -0,0 +1,213 @@ +ANNOVAR needs to be installed manually in the following way: + + +1) If you already have ANNOVAR installed on your system, simply edit the tool-data/annovar.loc file to reflect locations of + the perl scripts (annotate_variation.pl and convert2annovar.pl) and humandb directory (directory containing the annovar database files) +1b) Restart galaxy instance for changes in .loc file to take effect + + +2) If you do not have ANNOVAR installed, request annovar download and sign license here: + http://www.openbioinformatics.org/annovar/annovar_download_form.php + + 3) Once downloaded, install annovar per the installation instructions and edit annovar.loc file to reflect location of directory containing perl scripts. + tool uses annotate_variation.pl and convert2annovar.pl + + 4) Then download all desired databases for all desired builds as follows: + annotate_variation.pl -downdb -buildver [-webfrom annovar] + + where is location where all database files should be stored + and is the database file to download, e.g. refGene (see bottom of document for all available database files at the time of writing this tool) + and can be hg18 or hg19 for humans, also other organisms available. + + list of all available databases can be found here: http://www.openbioinformatics.org/annovar/annovar_db.html + + 5) edit the tool-data/annovar.loc file to reflect location of humandb folder + 5b) restart galaxy instance for changes in .loc file to take effect + +6) Tool uses cgatools join for combining of files, this should be installed automatically with repository. If not, get a copy from Complete Genomics directly: + wget http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz + tar xvzf cgatools-1.7.1.5-linux_binary-x86_64.tar.gz + + and place the "cgatools" binary found in bin/ directory on your $PATH + + +list of files in my own humandb folder: + + hg18_ALL.sites.2012_04.txt + hg18_ALL.sites.2012_04.txt.idx + hg18_avsift.txt + hg18_avsift.txt.idx + hg18_CEU.sites.2010_07.txt + hg18_CEU.sites.2010_07.txt.idx + hg18_cg46.txt + hg18_cg46.txt.idx + hg18_cg69.txt + hg18_cg69.txt.idx + hg18_cytoBand.txt + hg18_dgv.txt + hg18_ensGeneMrna.fa + hg18_ensGene.txt + hg18_esp5400_aa.txt + hg18_esp5400_aa.txt.idx + hg18_esp5400_all.txt + hg18_esp5400_all.txt.idx + hg18_esp5400_ea.txt + hg18_esp5400_ea.txt.idx + hg18_esp6500_aa.txt + hg18_esp6500_aa.txt.idx + hg18_esp6500_all.txt + hg18_esp6500_all.txt.idx + hg18_esp6500_ea.txt + hg18_esp6500_ea.txt.idx + hg18_esp6500si_aa.txt + hg18_esp6500si_aa.txt.idx + hg18_esp6500si_all.txt + hg18_esp6500si_all.txt.idx + hg18_esp6500si_ea.txt + hg18_esp6500si_ea.txt.idx + hg18_example_db_generic.txt + hg18_example_db_gff3.txt + hg18_genomicSuperDups.txt + hg18_gerp++gt2.txt + hg18_gerp++gt2.txt.idx + hg18_gwasCatalog.txt + hg18_JPTCHB.sites.2010_07.txt + hg18_JPTCHB.sites.2010_07.txt.idx + hg18_keggMapDesc.txt + hg18_keggPathway.txt + hg18_kgXref.txt + hg18_knownGeneMrna.fa + hg18_knownGene.txt + hg18_ljb_all.txt + hg18_ljb_all.txt.idx + hg18_ljb_lrt.txt + hg18_ljb_lrt.txt.idx + hg18_ljb_mt.txt + hg18_ljb_mt.txt.idx + hg18_ljb_phylop.txt + hg18_ljb_phylop.txt.idx + hg18_ljb_pp2.txt + hg18_ljb_pp2.txt.idx + hg18_ljb_sift.txt + hg18_ljb_sift.txt.idx + hg18_phastConsElements44way.txt + hg18_refGeneMrna.fa + hg18_refGene.txt + hg18_refLink.txt + hg18_snp128NonFlagged.txt + hg18_snp128NonFlagged.txt.idx + hg18_snp128.txt + hg18_snp128.txt.idx + hg18_snp129NonFlagged.txt + hg18_snp129NonFlagged.txt.idx + hg18_snp129.txt + hg18_snp129.txt.idx + hg18_snp130NonFlagged.txt + hg18_snp130NonFlagged.txt.idx + hg18_snp130.txt + hg18_snp130.txt.idx + hg18_snp131NonFlagged.txt + hg18_snp131NonFlagged.txt.idx + hg18_snp131.txt + hg18_snp131.txt.idx + hg18_snp132NonFlagged.txt + hg18_snp132NonFlagged.txt.idx + hg18_snp132.txt + hg18_snp132.txt.idx + hg18_tfbsConsSites.txt + hg18_YRI.sites.2010_07.txt + hg18_YRI.sites.2010_07.txt.idx + hg19_AFR.sites.2012_04.txt + hg19_AFR.sites.2012_04.txt.idx + hg19_ALL.sites.2010_11.txt + hg19_ALL.sites.2010_11.txt.idx + hg19_ALL.sites.2012_02.txt + hg19_ALL.sites.2012_02.txt.idx + hg19_ALL.sites.2012_04.txt + hg19_ALL.sites.2012_04.txt.idx + hg19_AMR.sites.2012_04.txt + hg19_AMR.sites.2012_04.txt.idx + hg19_ASN.sites.2012_04.txt + hg19_ASN.sites.2012_04.txt.idx + hg19_avsift.txt + hg19_avsift.txt.idx + hg19_cg46.txt + hg19_cg46.txt.idx + hg19_cg69.txt + hg19_cg69.txt.idx + hg19_cosmic61.txt + hg19_cosmic61.txt.idx + hg19_cosmic63.txt + hg19_cosmic63.txt.idx + hg19_cosmic64.txt + hg19_cosmic64.txt.idx + hg19_cosmic65.txt + hg19_cosmic65.txt.idx + hg19_cytoBand.txt + hg19_dgv.txt + hg19_ensGeneMrna.fa + hg19_ensGene.txt + hg19_esp5400_aa.txt + hg19_esp5400_aa.txt.idx + hg19_esp5400_all.txt + hg19_esp5400_all.txt.idx + hg19_esp5400_ea.txt + hg19_esp5400_ea.txt.idx + hg19_esp6500_aa.txt + hg19_esp6500_aa.txt.idx + hg19_esp6500_all.txt + hg19_esp6500_all.txt.idx + hg19_esp6500_ea.txt + hg19_esp6500_ea.txt.idx + hg19_esp6500si_aa.txt + hg19_esp6500si_aa.txt.idx + hg19_esp6500si_all.txt + hg19_esp6500si_all.txt.idx + hg19_esp6500si_ea.txt + hg19_esp6500si_ea.txt.idx + hg19_EUR.sites.2012_04.txt + hg19_EUR.sites.2012_04.txt.idx + hg19_genomicSuperDups.txt + hg19_gerp++gt2.txt + hg19_gerp++gt2.txt.idx + hg19_gwasCatalog.txt + hg19_keggMapDesc.txt + hg19_keggPathway.txt + hg19_kgXref.txt + hg19_knownGeneMrna.fa + hg19_knownGene.txt + hg19_ljb_all.txt + hg19_ljb_all.txt.idx + hg19_ljb_lrt.txt + hg19_ljb_lrt.txt.idx + hg19_ljb_mt.txt + hg19_ljb_mt.txt.idx + hg19_ljb_phylop.txt + hg19_ljb_phylop.txt.idx + hg19_ljb_pp2.txt + hg19_ljb_pp2.txt.idx + hg19_ljb_sift.txt + hg19_ljb_sift.txt.idx + hg19_phastConsElements46way.txt + hg19_refGeneMrna.fa + hg19_refGene.txt + hg19_refLink.txt + hg19_snp130NonFlagged.txt + hg19_snp130NonFlagged.txt.idx + hg19_snp130.txt + hg19_snp130.txt.idx + hg19_snp131NonFlagged.txt + hg19_snp131NonFlagged.txt.idx + hg19_snp131.txt + hg19_snp132NonFlagged.txt + hg19_snp132NonFlagged.txt.idx + hg19_snp132.txt + hg19_snp132.txt.idx + hg19_snp135NonFlagged.txt + hg19_snp135NonFlagged.txt.idx + hg19_snp135.txt + hg19_snp137NonFlagged.txt + hg19_snp137NonFlagged.txt.idx + hg19_snp137.txt + hg19_tfbsConsSites.txt + diff -r 60630d56ab1f -r ed770baa1f16 tool-data/annovar.loc.sample --- a/tool-data/annovar.loc.sample Wed Sep 04 08:02:09 2013 -0400 +++ b/tool-data/annovar.loc.sample Mon Nov 18 10:43:57 2013 -0500 @@ -1,6 +1,6 @@ #loc file for annovar tool - +# # value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb -hg18 hg18 build 36 (hg18) /path/to/annovarscripts /path/to/humandb -hg19 hg19 build 37 (hg19) /path/to/annovarscripts /path/to/humandb +#hg18 hg18 hg18 [Human Mar. 2006 (NCBI36/hg18)] /path/to/annovarscripts /path/to/humandb +#hg19 hg19 hg19 [Human Feb. 2009 (GRCh37/hg19)] /path/to/annovarscripts /path/to/humandb diff -r 60630d56ab1f -r ed770baa1f16 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Wed Sep 04 08:02:09 2013 -0400 +++ b/tool_data_table_conf.xml.sample Mon Nov 18 10:43:57 2013 -0500 @@ -1,5 +1,7 @@ + value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb
+
diff -r 60630d56ab1f -r ed770baa1f16 tool_data_table_conf.xml.sample~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample~ Mon Nov 18 10:43:57 2013 -0500 @@ -0,0 +1,5 @@ + + +value, dbkey, name, ANNOVAR_scripts, ANNOVAR_humandb + +
diff -r 60630d56ab1f -r ed770baa1f16 tool_dependencies.xml --- a/tool_dependencies.xml Wed Sep 04 08:02:09 2013 -0400 +++ b/tool_dependencies.xml Mon Nov 18 10:43:57 2013 -0500 @@ -1,6 +1,6 @@ - + http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz diff -r 60630d56ab1f -r ed770baa1f16 tool_dependencies.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml~ Mon Nov 18 10:43:57 2013 -0500 @@ -0,0 +1,23 @@ + + + + + + http://sourceforge.net/projects/cgatools/files/1.7.1/cgatools-1.7.1.5-linux_binary-x86_64.tar.gz + chmod a+x bin/cgatools + + bin/cgatools + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + $REPOSITORY_INSTALL_DIR + + + + + Downloads and installs the cgatools binary. + + + + diff -r 60630d56ab1f -r ed770baa1f16 tools/annovar/annovar.sh --- a/tools/annovar/annovar.sh Wed Sep 04 08:02:09 2013 -0400 +++ b/tools/annovar/annovar.sh Mon Nov 18 10:43:57 2013 -0500 @@ -1,7 +1,12 @@ #!/bin/bash test="N" +dofilter="N" +######################### +# DEFINE SOME +# FUNCTIONS +######################### function usage(){ echo "usage: $0 todo" @@ -167,7 +172,14 @@ -set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage +################################# +# +# PARSE PARAMETERS +# +################################# + + +set -- `getopt -n$0 -u -a --longoptions="inputfile: buildver: humandb: varfile: VCF: chrcol: startcol: endcol: refcol: obscol: vartypecol: convertcoords: geneanno: hgvs: verdbsnp: tfbs: mce: cytoband: segdup: dgv: gwas: ver1000g: cg46: cg69: impactscores: newimpactscores: otherinfo: esp: gerp: cosmic61: cosmic63: cosmic64: cosmic65: cosmic67: clinvar: nci60: outall: outfilt: outinvalid: scriptsdir: dorunannovar: dofilter: filt_dbsnp: filt1000GALL: filt1000GAFR: filt1000GAMR: filt1000GASN: filt1000GEUR: filtESP6500ALL: filtESP6500EA: filtESP6500AA: filtcg46: filtcg69: dummy:" "h:" "$@"` || usage [ $# -eq 0 ] && usage @@ -176,8 +188,8 @@ do case "$1" in --inputfile) infile=$2;shift;; # inputfile - --buildver) buildver=$2;shift;; # hg18 or hg19 - --humandb) humandb=$2;shift;; # location of humandb database + --buildver) buildvertmp=$2;shift;; # hg18 or hg19 + --humandb) humandbtmp=$2;shift;; # location of humandb database --varfile) varfile=$2;shift;; # Y or N --VCF) vcf=$2;shift;; #Y or N --chrcol) chrcol=$2;shift;; # which column has chr @@ -188,6 +200,7 @@ --vartypecol) vartypecol=$2;shift;; # which column has vartype --convertcoords) convertcoords=$2;shift;; # Y or N convert coordinate from CG to 1-based? --geneanno) geneanno=$2;shift;; # comma-separated list of strings refSeq, knowngene, ensgene + --hgvs) hgvs=$2;shift;; --verdbsnp) verdbsnp=$2;shift;; #comma-separated list of dbsnp version to annotate with (e.g. "132,135NonFlagged,137")" --tfbs) tfbs=$2;shift;; # Y or N --mce) mce=$2;shift;; # Y or N @@ -199,13 +212,18 @@ --cg46) cg46=$2;shift;; --cg69) cg69=$2;shift;; --impactscores) impactscores=$2;shift;; # Y or N - --scriptsdir) scriptsdir=$2;shift;; # Y or N + --newimpactscores) newimpactscores=$2;shift;; # Y or N + --otherinfo) otherinfo=$2;shift;; + --scriptsdir) scriptsdirtmp=$2;shift;; # Y or N --esp) esp=$2;shift;; # Y or N --gerp) gerp=$2;shift;; # Y or N --cosmic61) cosmic61=$2;shift;; # Y or N --cosmic63) cosmic63=$2;shift;; # Y or N --cosmic64) cosmic64=$2;shift;; # Y or N --cosmic65) cosmic65=$2;shift;; # Y or N + --cosmic67) cosmic67=$2;shift;; # Y or N + --nci60) nci60=$2;shift;; # Y or N + --clinvar) clinvar=$2;shift;; # Y or N --filt_dbsnp) filt_dbsnp=$2;shift;; --filt1000GALL) threshold_1000g_ALL=$2;shift;; #threshold value --filt1000GAFR) threshold_1000g_AFR=$2;shift;; #threshold value @@ -220,8 +238,7 @@ --outall) outfile_all=$2;shift;; # file --outfilt) outfile_filt=$2;shift;; # file --outinvalid) outfile_invalid=$2;shift;; #file - --dorunannovar) dorunannovar=$2;shift;; #Y or N - --dofilter) dofilter=$2;shift;; #Y or N + --dorunannovar) dorunannovar=$2;shift;; #Y or N -h) shift;; --) shift;break;; -*) usage;; @@ -230,6 +247,11 @@ shift done +#sometimes galaxy screws up these variables after updates, if comma-separated list, use only what is before first comma +humandb=${humandbtmp%,*} +buildver=${buildvertmp%,*} +scriptsdir=${scriptsdirtmp%,*} + if [ $test == "Y" ] then @@ -249,6 +271,7 @@ echo "cg46: ${cg46}" echo "cg69: ${cg69}" echo "impactscores: $impactscores" + echo "impactscores: $newimpactscores" echo "esp: $esp" echo "gerp: $gerp" echo "cosmic: $cosmic" @@ -277,10 +300,18 @@ fi + +############################################ +# +# Annotate Variants +# +############################################ + +#parse geneanno param refgene="N" knowngene="N" ensgene="N" -#parse geneanno param + if [[ $geneanno =~ "refSeq" ]] then refgene="Y" @@ -293,7 +324,10 @@ then ensgene="Y" fi - +if [ $hgvs == "N" ] +then + hgvs="" +fi #parse verdbsnp/1000g/esp strings dbsnpstr=${verdbsnp//,/ } @@ -314,7 +348,8 @@ polyphen2="N" phylop="N" ljbsift="N" -#parse impactscores param + +#parse old impactscores param (obsolete) if [[ $impactscores =~ "mutationtaster" ]] then mutationtaster="Y" @@ -331,6 +366,10 @@ then ljbsift="Y" fi +if [[ $impactscores =~ "ljb2sift" ]] +then + ljb2sift="Y" +fi if [[ $impactscores =~ "pp2" ]] then polyphen2="Y" @@ -347,6 +386,65 @@ #ljb refers to Liu, Jian, Boerwinkle paper in Human Mutation with pubmed ID 21520341. Cite this paper if you use the scores +ljb2_sift="N" +ljb2_pp2hdiv="N" +ljb2_pp2hvar="N" +ljb2_lrt="N" +ljb2_mt="N" +ljb2_ma="N" +ljb2_fathmm="N" +ljb2_gerp="N" +ljb2_phylop="N" +ljb2_siphy="N" + +# parse ljb2 newimpactscores param +# ljb2_sift, ljb2_pp2hdiv, ljb2_pp2hvar, ljb2_lrt, ljb2_mt, ljb2_ma, ljb2_fathmm, ljb2_gerp++, ljb2_phylop, ljb2_siphy +if [[ $newimpactscores =~ "ljb2_sift" ]] +then + ljb2_sift="Y" +fi +if [[ $newimpactscores =~ "ljb2_pp2hdiv" ]] +then + ljb2_pp2hdiv="Y" +fi +if [[ $newimpactscores =~ "ljb2_pp2hvar" ]] +then + ljb2_pp2hvar="Y" +fi +if [[ $newimpactscores =~ "ljb2_lrt" ]] +then + ljb2_lrt="Y" +fi +if [[ $newimpactscores =~ "ljb2_mt" ]] +then + ljb2_mt="Y" +fi +if [[ $newimpactscores =~ "ljb2_ma" ]] +then + ljb2_ma="Y" +fi +if [[ $newimpactscores =~ "ljb2_fathmm" ]] +then + ljb2_fathmm="Y" +fi +if [[ $newimpactscores =~ "ljb2_gerp" ]] +then + ljb2_gerp="Y" +fi +if [[ $newimpactscores =~ "ljb2_phylop" ]] +then + ljb2_phylop="Y" +fi +if [[ $newimpactscores =~ "ljb2_siphy" ]] +then + ljb2_siphy="Y" +fi + +if [ $otherinfo == "N" ] +then + otherinfo="" +fi + #column header names we will be adding # ESP 6500 @@ -498,7 +596,7 @@ if [ $refgene == "Y" ] then echo -e "\nrefSeq gene" - $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene annovarinput $humandb 2>&1 + $scriptsdir/annotate_variation.pl --geneanno --buildver $buildver -dbtype gene ${hgvs} annovarinput $humandb 2>&1 annovarout=annovarinput.variant_function sed -i '1i\RefSeq_Func\tRefSeq_Gene\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout @@ -612,9 +710,9 @@ if [ $dgv == "Y" ] then echo -e "\nDGV Annotation" - $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgv annovarinput $humandb 2>&1 + $scriptsdir/annotate_variation.pl --regionanno --buildver $buildver -dbtype dgvMerged annovarinput $humandb 2>&1 - annovarout=annovarinput.${buildver}_dgv + annovarout=annovarinput.${buildver}_dgvMerged sed -i '1i\db\tDGV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout joinresults originalfile $annovarout 3 4 5 6 7 B.DGV fi @@ -807,6 +905,115 @@ fi + + + #### IMPACT SCORE ANNOTATIONS + + + if [ $ljb2_sift == "Y" ] + then + echo -e "\nLJB2 SIFT Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_sift annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_sift_dropped + sed -i '1i\db\tLJB2_SIFT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SIFT + fi + + if [ $ljb2_pp2hdiv == "Y" ] + then + echo -e "\nLJB2 pp2hdiv Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hdiv annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_pp2hdiv_dropped + sed -i '1i\db\tLJB2_PolyPhen2_HDIV\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HDIV + fi + + if [ $ljb2_pp2hvar == "Y" ] + then + echo -e "\nLJB2 pp2hvar Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_pp2hvar annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_pp2hvar_dropped + sed -i '1i\db\tLJB2_PolyPhen2_HVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PolyPhen2_HVAR + fi + + if [ $ljb2_lrt == "Y" ] + then + echo -e "\nLJB2 LRT Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_lrt annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_lrt_dropped + sed -i '1i\db\tLJB2_LRT\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_LRT + fi + + if [ $ljb2_mt == "Y" ] + then + echo -e "\nLJB2 mutationtaster Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_mt annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_mt_dropped + sed -i '1i\db\tLJB2_MutationTaster\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationTaster + fi + + if [ $ljb2_ma == "Y" ] + then + echo -e "\nLJB2 mutationassessor Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_ma annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_ma_dropped + sed -i '1i\db\tLJB2_MutationAssessor\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_MutationAssessor + fi + + if [ $ljb2_fathmm == "Y" ] + then + echo -e "\nLJB2 FATHMM Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_fathmm annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_fathmm_dropped + sed -i '1i\db\tLJB2_FATHMM\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_FATHMM + fi + + if [ $ljb2_gerp == "Y" ] + then + echo -e "\nLJB2 GERP++ Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_gerp++ annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_gerp++_dropped + sed -i '1i\db\tLJB2_GERP++\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_GERP++ + fi + + if [ $ljb2_phylop == "Y" ] + then + echo -e "\nLJB2 PhyloP Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_phylop annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_phylop_dropped + sed -i '1i\db\tLJB2_PhyloP\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_PhyloP + fi + + if [ $ljb2_siphy == "Y" ] + then + echo -e "\nLJB2 SiPhy Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver $otherinfo -dbtype ljb2_siphy annovarinput $humandb 2>&1 + + annovarout=annovarinput.${buildver}_ljb2_siphy_dropped + sed -i '1i\db\tLJB2_SiPhy\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.LJB2_SiPhy + fi + + + + ### OLD IMPACT SCORE ANNOTATIONS + # SIFT if [ $avsift == "Y" ] then @@ -822,7 +1029,7 @@ # SIFT2 if [ $ljbsift == "Y" ] then - echo -e "\nSIFT Annotation" + echo -e "\nLJB SIFT Annotation" $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype ljb_sift annovarinput $humandb 2>&1 annovarout=annovarinput.${buildver}_ljb_sift_dropped @@ -984,35 +1191,6 @@ fi - #ESP6500 - if [ $esp == "Y" ] - then - echo -e "\nESP Annotation OLD" - # ALL - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_all annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_all_dropped - sed -i '1i\db\t'$esp6500_colheader_ALL'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_ALL - - - # European American - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_ea annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_ea_dropped - sed -i '1i\db\t'$esp6500_colheader_EA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_EA - - # African Americans - $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype esp6500si_aa annovarinput $humandb 2>&1 - - annovarout=annovarinput.${buildver}_esp6500si_aa_dropped - sed -i '1i\db\t'$esp6500_colheader_AA'\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout - joinresults originalfile $annovarout 3 4 5 6 7 B.$esp6500_colheader_AA - fi - - - #GERP++ if [ $gerp == "Y" ] then @@ -1070,6 +1248,39 @@ fi + if [[ $cosmic67 == "Y" && $buildver == "hg19" ]] + then + echo -e "\nCOSMIC67 Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cosmic67 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_cosmic67_dropped" + sed -i '1i\db\tCOSMIC67\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.COSMIC67 + + fi + + if [[ $clinvar == "Y" && $buildver == "hg19" ]] + then + echo -e "\nCLINVAR Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype clinvar_20131105 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_clinvar_20131105_dropped" + sed -i '1i\db\tCLINVAR\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.CLINVAR + + fi + + if [[ $nci60 == "Y" && $buildver == "hg19" ]] + then + echo -e "\nNCI60 Annotation" + $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype nci60 annovarinput $humandb 2>&1 + + annovarout="annovarinput.${buildver}_nci60_dropped" + sed -i '1i\db\tNCI60\tchromosome\tstart\tend\treference\talleleSeq"'"$vcfheader"'"' $annovarout + joinresults originalfile $annovarout 3 4 5 6 7 B.NCI60 + + fi + #cg46 if [[ $cg46 == "Y" ]] then @@ -1138,52 +1349,6 @@ -############################################ -# -# Filter Annotated Variants -# -############################################ - - -if [[ $dofilter == "Y" ]] -then - echo "starting filtering" - cp originalfile filteredfile - - ### do the filtering - # usage: runfilter (-1=do not filter, 0=filter any value) - - #1000genomes - runfilter filteredfile ${g1000_colheader_ALL} ${threshold_1000g_ALL} - runfilter filteredfile ${g1000_colheader_AFR} ${threshold_1000g_AFR} - runfilter filteredfile ${g1000_colheader_AMR} ${threshold_1000g_AMR} - runfilter filteredfile ${g1000_colheader_ASN} ${threshold_1000g_ASN} - runfilter filteredfile ${g1000_colheader_EUR} ${threshold_1000g_EUR} - - #esp - runfilter filteredfile ${esp6500_colheader_ALL} ${threshold_ESP6500_ALL} - runfilter filteredfile ${esp6500_colheader_EA} ${threshold_ESP6500_EA} - runfilter filteredfile ${esp6500_colheader_AA} ${threshold_ESP6500_AA} - - #dbsnp - for version in $filt_dbsnpstr - do - if [ $version == "None" ] - then - break - fi - runfilter filteredfile "db$version" "text" #-42 will filter any non-empty string in that field - - done - - #complete genomics - runfilter filteredfile ${cg46_colheader} ${threshold_cg46} - runfilter filteredfile ${cg69_colheader} ${threshold_cg69} - - #move filtered output file to galaxy output file - cp filteredfile $outfile_filt - -fi @@ -1201,3 +1366,4 @@ + diff -r 60630d56ab1f -r ed770baa1f16 tools/annovar/annovar.xml --- a/tools/annovar/annovar.xml Wed Sep 04 08:02:09 2013 -0400 +++ b/tools/annovar/annovar.xml Mon Nov 18 10:43:57 2013 -0500 @@ -1,19 +1,19 @@ - + Annotate a file using ANNOVAR - cgatools17 + cgatools annovar.sh - --impactscores ${impactscores} --esp ${esp} --gerp ${gerp} --cosmic61 ${cosmic61} --cosmic63 ${cosmic63} --cosmic64 ${cosmic64} - --cosmic65 ${cosmic65} + --cosmic65 ${cosmic65} + --cosmic67 ${cosmic67} --outall ${annotated} --outinvalid ${invalid} --dorunannovar ${dorun} @@ -57,6 +57,10 @@ --cg46 ${cgfortysix} --cg69 ${cgsixtynine} --ver1000g ${ver1000g} + --hgvs ${hgvs} + --otherinfo ${otherinfo} + --newimpactscores ${newimpactscores} + --clinvar ${clinvar} @@ -98,7 +102,8 @@ - + + @@ -154,14 +159,32 @@ + + + - + + + + + + + + + + + + + + + @@ -196,13 +219,23 @@ Input Formats may be one of the following: - VCF file - - Complete Genomics varfile +VCF file +Complete Genomics varfile + +Custom tab-delimited file (specify chromosome, start, end, reference allele, observed allele columns) - Custom tab-delimited file (specify chromosome, start, end, reference allele, observed allele columns) +Custom tab-delimited CG-derived file (specify chromosome, start, end, reference allele, observed allele, varType columns) + + +**Database Notes** - Custom tab-delimited CG-derived file (specify chromosome, start, end, reference allele, observed allele, varType columns) +see ANNOVAR website for extensive documentation, a few notes on some of the databases: + +**LJB2 Database** + +PolyPhen2 HVAR should be used for diagnostics of Mendelian diseases, which requires distinguishing mutations with drastic effects from all the remaining human variation, including abundant mildly deleterious alleles.The authors recommend calling probably damaging if the score is between 0.909 and 1, and possibly damaging if the score is between 0.447 and 0.908, and benign if the score is between 0 and 0.446. + +PolyPhen HDIV should be used when evaluating rare alleles at loci potentially involved in complex phenotypes, dense mapping of regions identified by genome-wide association studies, and analysis of natural selection from sequence data. The authors recommend calling probably damaging if the score is between 0.957 and 1, and possibly damaging if the score is between 0.453 and 0.956, and benign is the score is between 0 and 0.452.