# HG changeset patch # User iuc # Date 1451402387 18000 # Node ID b5207530f991cc5d4a5d385bd2370fcf3774e7dd # Parent 27ce3de83007637331a80c4eb2060c34d05bac99 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_gemini_downloader commit e88029bb12e5262687267293f9d2a694eb00d3f0-dirty diff -r 27ce3de83007 -r b5207530f991 data_manager/data_manager_gemini_download.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_gemini_download.py Tue Dec 29 10:19:47 2015 -0500 @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +import sys +import os +import json +import shlex +import datetime +import subprocess + +def main(): + + today = datetime.date.today() + params = json.loads( open( sys.argv[1] ).read() ) + target_directory = params[ 'output_data' ][0]['extra_files_path'] + os.mkdir( target_directory ) + cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (target_directory, params['param_dict']['gerp_bp'], params['param_dict']['cadd'] ) + ret = subprocess.check_call( cmd, shell=True ) + data_manager_dict = { + 'data_tables': + {'gemini_databases': [ + {'value': today.isoformat(), 'dbkey': 'hg19', 'name': 'GEMINI annotations (%s)' % today.isoformat(), 'path': './%s' % today.isoformat() } + ] + } + } + + #save info to json file + with open( sys.argv[1], 'wb' ) as out: + out.write( json.dumps( data_manager_dict ) ) + +if __name__ == "__main__": + main() + diff -r 27ce3de83007 -r b5207530f991 data_manager/data_manager_gemini_download.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_gemini_download.xml Tue Dec 29 10:19:47 2015 -0500 @@ -0,0 +1,36 @@ + + Download a new database + + gemini + + + + + + + data_manager_gemini_download.py "$out_file" + + + + + + + + + + + + +This tool downloads the GEMINI databases. + +For details about this tool, please go to http://gemini.readthedocs.org + + + + 10.1371/journal.pcbi.1003153 + + + + diff -r 27ce3de83007 -r b5207530f991 data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Tue Dec 29 10:19:47 2015 -0500 @@ -0,0 +1,21 @@ + + + + + + + + + + + gemini/data/${dbkey}/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/data/${dbkey}/${value}/gemini/data/ + abspath + + + + + + + diff -r 27ce3de83007 -r b5207530f991 gemini_annotate.xml --- a/gemini_annotate.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ - - adding your own custom annotations - - gemini_macros.xml - annotate - - - - - - tabixed.gz; -tabix -p bed tabixed.gz; - - gemini @BINARY@ - -f tabixed.gz - -c $column_name - -a $a.a_selector - #if $a.a_selector == 'extract': - -t $a.column_type - -e $a.column_extracts - -o $a.operation - #end if - - "${ infile }" - > "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -It is inevitable that researchers will want to enhance the gemini framework with their own, custom annotations. gemini provides a sub-command called annotate for exactly this purpose. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_autosomal_recessive.xml --- a/gemini_autosomal_recessive.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ - - Find variants meeting an autosomal recessive/dominant model - - gemini_macros.xml - - - - - - 0: - --min-kindreds $min_kindreds - #end if - - "${ infile }" - > "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, one can leverage a -built-in tool for identifying variants that meet an autosomal recessive or dominant inheritance pattern. -The reported variants will be restricted to those variants having the potential to impact the function of affecting protein coding transcripts. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_burden.xml --- a/gemini_burden.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ - - perform sample-wise gene-level burden calculations - - gemini_macros.xml - burden - - - - - -= 0.0: - --min-aaf $min_aaf - #end if - #if float( str($max_aaf) ) >= 0.0: - --max-aaf $max_aaf - #end if - "${ infile }" - > "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. -By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample. - -$ gemini burden test.burden.db -gene M10475 M10478 M10500 M128215 -WDR37 2 2 2 2 -CTBP2 0 0 0 1 -DHODH 1 0 0 0 - -@CITATION@ - - - 10.1371/journal.pgen.1001322 - - diff -r 27ce3de83007 -r b5207530f991 gemini_comp_hets.xml --- a/gemini_comp_hets.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ - - Identifying potential compound heterozygotes - - gemini_macros.xml - comp_hets - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - -**What it does** - -Many recessive disorders are caused by compound heterozygotes. Unlike canonical recessive sites where the same recessive allele is -inherited from both parents at the _same_ site in the gene, compound heterozygotes occur when the individual’s phenotype is caused -by two heterozygous recessive alleles at _different_ sites in a particular gene. - -So basically, we are looking for two (typically loss-of-function (LoF)) heterozygous variants impacting the same gene at different loci. -The complicating factor is that this is _recessive_ and as such, we must also require that the consequential alleles at each heterozygous -site were inherited on different chromosomes (one from each parent). As such, in order to use this tool, we require that all variants are phased. -Once this has been done, the comp_hets tool will provide a report of candidate compound heterozygotes for each sample/gene. - - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_db_info.xml --- a/gemini_db_info.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ - - List the gemini database tables and columns - - gemini_macros.xml - db_info - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - -**What it does** - -Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote. -If you can’t recall the name of particular column, just use the db_info tool. It will report all of the tables and all of the columns / types in each table. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_de_novo.xml --- a/gemini_de_novo.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ - - Identifying potential de novo mutations - - gemini_macros.xml - de_novo - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - -**What it does** - -Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, -you can use this tool for identifying de novo (a.k.a spontaneous) mutations that arise in offspring. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_interactions.xml --- a/gemini_interactions.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ - - Find genes among variants that are interacting partners - - gemini_macros.xml - interactions - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Integrating the knowledge of the known protein-protein interactions would be useful in explaining variation data. -Meaning to say that a damaging variant in an interacting partner of a potential protein may be equally interesting as the -protein itself. We have used the HPRD_ binary interaction data to build a p-p network graph which can be explored by GEMINI. - -.. _HPRD: http://www.ncbi.nlm.nih.gov/pubmed/18988627 - - -@CITATION@ - - - 10.1093/nar/gkn892 - - diff -r 27ce3de83007 -r b5207530f991 gemini_load.xml --- a/gemini_load.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ - - Loading a VCF file into GEMINI - - gemini_macros.xml - load - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Before we can use GEMINI to explore genetic variation, we must first load our VCF file into the GEMINI database framework. -We expect you to have first annotated the functional consequence of each variant in your VCF using either VEP or snpEff. - -http://gemini.readthedocs.org/en/latest/content/loading.html - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_lof_sieve.xml --- a/gemini_lof_sieve.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ - - Filter LoF variants by transcript position and type - - gemini_macros.xml - lof_sieve - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - -**What it does** - -Not all candidate LoF variants are created equal. For e.g, a nonsense (stop gain) variant impacting the first 5% of a polypeptide is far -more likely to be deleterious than one affecting the last 5%. Assuming you’ve annotated your VCF with snpEff v3.0+, the lof_sieve tool -reports the fractional position (e.g. 0.05 for the first 5%) of the mutation in the amino acid sequence. -In addition, it also reports the predicted function of the transcript so that one can segregate candidate -LoF variants that affect protein_coding transcripts from processed RNA, etc. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_macros.xml --- a/gemini_macros.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ - - - - gemini - grabix - tabix - samtools - bedtools - - - - - - gemini --version - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if str($filter.filter_selector) == 'yes' and $filter.filter: - #import pipes - --filter ${ pipes.quote( str( $filter.filter ) ) or "''" } - #end if - - - - - - - - - 0.10.1 - - ------ - -**Citation** - -If you use GEMINI in your research, please cite the following manuscript: - - - - - 10.1371/journal.pcbi.1003153 - - - - - - - - - - value is not None and value.metadata.gemini_version == "@VERSION@" - - - - diff -r 27ce3de83007 -r b5207530f991 gemini_pathways.xml --- a/gemini_pathways.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ - - Map genes and variants to KEGG pathways - - gemini_macros.xml - pathways - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - -**What it does** - -Mapping genes to biological pathways is useful in understanding the function/role played by a gene. -Likewise, genes involved in common pathways is helpful in understanding heterogeneous diseases. -We have integrated the KEGG pathway mapping for gene variants, to explain/annotate variation. - -This requires your VCF be annotated with either snpEff/VEP. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_query.xml --- a/gemini_query.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,110 +0,0 @@ - - Querying the GEMINI database - - gemini_macros.xml - query - - - - - - 0: - --min-kindreds $min_kindreds - #end if - ##--format FORMAT Format of output (JSON, TPED or default) # we will take default for the time being - ## --sample-delim STRING The delimiter to be used with the --show-samples option. - - #if $q.strip(): - -q "${q}" - #end if - - "${ infile }" - > "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation. -The expressive power of SQL allows one to pose intricate questions of one’s variation data. This tool offers you an easy way to query your variants! - -http://gemini.readthedocs.org/en/latest/content/querying.html - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_region.xml --- a/gemini_region.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ - - Extracting variants from specific regions or genes - - gemini_macros.xml - region - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -One often is concerned with variants found solely in a particular gene or genomic region. - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_roh.xml --- a/gemini_roh.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - - Identifying runs of homozygosity - - gemini_macros.xml - roh - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -=========================================================================== -``ROH``: Identifying runs of homozygosity -=========================================================================== -Runs of homozygosity are long stretches of homozygous genotypes that reflect -segments shared identically by descent and are a result of consanguinity or -natural selection. Consanguinity elevates the occurrence of rare recessive -diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious -mutations. Hence, the identification of these runs holds medical value. - -The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. -The tool basically looks at every homozygous position on the chromosome as a possible -start site for the run and looks for those that could give rise to a potentially long -stretch of homozygous genotypes. - -For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) -the possible roh runs (H) would be: - - -:: - - genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H - roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H - roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H - roh_run3 = H H H H H u H H H H H H H h H H H H H - roh_run4 = H H H H H H H h H H H H H - -roh returned for --min-snps = 20 would be: - -:: - - roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H - roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H - - -As you can see, the immediate homozygous position right of a break (h or u) would be the possible -start of a new roh run and genotypes to the left of a break are pruned since they cannot -be part of a longer run than we have seen before. - - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_stats.xml --- a/gemini_stats.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ - - Compute useful variant statistics - - gemini_macros.xml - stats - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -The stats tool computes some useful variant statistics for a GEMINI database. - - -$ gemini stats --summarize "select * from variants where in_dbsnp=1 and chrom='chr1'" my.db -sample total num_het num_hom_alt -M10475 1 1 0 -M128215 1 1 0 -M10478 2 2 0 -M10500 2 1 1 - - - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 gemini_windower.xml --- a/gemini_windower.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ - - Conducting analyses on genome "windows" - - gemini_macros.xml - windower - - - - - - "${ outfile }" -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -It computs variation metrics across genomic windows (both fixed and sliding). - -@CITATION@ - - - diff -r 27ce3de83007 -r b5207530f991 readme.rst --- a/readme.rst Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -========================= -Galaxy wrapper for GEMINI -========================= - - -GEMINI: a flexible framework for exploring genome variation - -GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of -the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, -and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very -powerful system for exploring genetic variation for for disease and population genetics. - -Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically -annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, -OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows -one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an -enhanced SQL engine. - -Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153). - - -============ -Installation -============ - -It is recommended to install this wrapper via the `Galaxy Tool Shed`. - -.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini - - -======= -History -======= -- 0.9.1: Initial public release - - -==================== -Detailed description -==================== - -View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html - - -=============================== -Wrapper Licence (MIT/BSD style) -=============================== - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. - diff -r 27ce3de83007 -r b5207530f991 repository_dependencies.xml --- a/repository_dependencies.xml Fri Oct 16 13:55:09 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - - - - diff -r 27ce3de83007 -r b5207530f991 tool-data/gemini_databases.loc.sample --- a/tool-data/gemini_databases.loc.sample Fri Oct 16 13:55:09 2015 -0400 +++ b/tool-data/gemini_databases.loc.sample Tue Dec 29 10:19:47 2015 -0500 @@ -1,3 +1,3 @@ ## GEMINI databases -#Version dbkey Description Path -#08_08_2014 hg19 Database (08-08-2014) /path/to/data +#Version dbkey Description +#08_08_2014 hg19 Database (08-08-2014) diff -r 27ce3de83007 -r b5207530f991 tool_dependencies.xml --- a/tool_dependencies.xml Fri Oct 16 13:55:09 2015 -0400 +++ b/tool_dependencies.xml Tue Dec 29 10:19:47 2015 -0500 @@ -1,18 +1,6 @@ - - - - - - - - - - - - - - + +