# HG changeset patch
# User iuc
# Date 1451402387 18000
# Node ID b5207530f991cc5d4a5d385bd2370fcf3774e7dd
# Parent 27ce3de83007637331a80c4eb2060c34d05bac99
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_gemini_downloader commit e88029bb12e5262687267293f9d2a694eb00d3f0-dirty
diff -r 27ce3de83007 -r b5207530f991 data_manager/data_manager_gemini_download.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_gemini_download.py Tue Dec 29 10:19:47 2015 -0500
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import json
+import shlex
+import datetime
+import subprocess
+
+def main():
+
+ today = datetime.date.today()
+ params = json.loads( open( sys.argv[1] ).read() )
+ target_directory = params[ 'output_data' ][0]['extra_files_path']
+ os.mkdir( target_directory )
+ cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (target_directory, params['param_dict']['gerp_bp'], params['param_dict']['cadd'] )
+ ret = subprocess.check_call( cmd, shell=True )
+ data_manager_dict = {
+ 'data_tables':
+ {'gemini_databases': [
+ {'value': today.isoformat(), 'dbkey': 'hg19', 'name': 'GEMINI annotations (%s)' % today.isoformat(), 'path': './%s' % today.isoformat() }
+ ]
+ }
+ }
+
+ #save info to json file
+ with open( sys.argv[1], 'wb' ) as out:
+ out.write( json.dumps( data_manager_dict ) )
+
+if __name__ == "__main__":
+ main()
+
diff -r 27ce3de83007 -r b5207530f991 data_manager/data_manager_gemini_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_gemini_download.xml Tue Dec 29 10:19:47 2015 -0500
@@ -0,0 +1,36 @@
+
+ Download a new database
+
+ gemini
+
+
+
+
+
+
+ data_manager_gemini_download.py "$out_file"
+
+
+
+
+
+
+
+
+
+
+
+
+This tool downloads the GEMINI databases.
+
+For details about this tool, please go to http://gemini.readthedocs.org
+
+
+
+ 10.1371/journal.pcbi.1003153
+
+
+
+
diff -r 27ce3de83007 -r b5207530f991 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Tue Dec 29 10:19:47 2015 -0500
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
diff -r 27ce3de83007 -r b5207530f991 gemini_annotate.xml
--- a/gemini_annotate.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-
- adding your own custom annotations
-
- gemini_macros.xml
- annotate
-
-
-
-
-
- tabixed.gz;
-tabix -p bed tabixed.gz;
-
- gemini @BINARY@
- -f tabixed.gz
- -c $column_name
- -a $a.a_selector
- #if $a.a_selector == 'extract':
- -t $a.column_type
- -e $a.column_extracts
- -o $a.operation
- #end if
-
- "${ infile }"
- > "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-It is inevitable that researchers will want to enhance the gemini framework with their own, custom annotations. gemini provides a sub-command called annotate for exactly this purpose.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_autosomal_recessive.xml
--- a/gemini_autosomal_recessive.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-
- Find variants meeting an autosomal recessive/dominant model
-
- gemini_macros.xml
-
-
-
-
-
- 0:
- --min-kindreds $min_kindreds
- #end if
-
- "${ infile }"
- > "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, one can leverage a
-built-in tool for identifying variants that meet an autosomal recessive or dominant inheritance pattern.
-The reported variants will be restricted to those variants having the potential to impact the function of affecting protein coding transcripts.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_burden.xml
--- a/gemini_burden.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-
- perform sample-wise gene-level burden calculations
-
- gemini_macros.xml
- burden
-
-
-
-
-
-= 0.0:
- --min-aaf $min_aaf
- #end if
- #if float( str($max_aaf) ) >= 0.0:
- --max-aaf $max_aaf
- #end if
- "${ infile }"
- > "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis.
-By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample.
-
-$ gemini burden test.burden.db
-gene M10475 M10478 M10500 M128215
-WDR37 2 2 2 2
-CTBP2 0 0 0 1
-DHODH 1 0 0 0
-
-@CITATION@
-
-
- 10.1371/journal.pgen.1001322
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_comp_hets.xml
--- a/gemini_comp_hets.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-
- Identifying potential compound heterozygotes
-
- gemini_macros.xml
- comp_hets
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Many recessive disorders are caused by compound heterozygotes. Unlike canonical recessive sites where the same recessive allele is
-inherited from both parents at the _same_ site in the gene, compound heterozygotes occur when the individual’s phenotype is caused
-by two heterozygous recessive alleles at _different_ sites in a particular gene.
-
-So basically, we are looking for two (typically loss-of-function (LoF)) heterozygous variants impacting the same gene at different loci.
-The complicating factor is that this is _recessive_ and as such, we must also require that the consequential alleles at each heterozygous
-site were inherited on different chromosomes (one from each parent). As such, in order to use this tool, we require that all variants are phased.
-Once this has been done, the comp_hets tool will provide a report of candidate compound heterozygotes for each sample/gene.
-
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_db_info.xml
--- a/gemini_db_info.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-
- List the gemini database tables and columns
-
- gemini_macros.xml
- db_info
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote.
-If you can’t recall the name of particular column, just use the db_info tool. It will report all of the tables and all of the columns / types in each table.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_de_novo.xml
--- a/gemini_de_novo.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-
- Identifying potential de novo mutations
-
- gemini_macros.xml
- de_novo
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI,
-you can use this tool for identifying de novo (a.k.a spontaneous) mutations that arise in offspring.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_interactions.xml
--- a/gemini_interactions.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-
- Find genes among variants that are interacting partners
-
- gemini_macros.xml
- interactions
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Integrating the knowledge of the known protein-protein interactions would be useful in explaining variation data.
-Meaning to say that a damaging variant in an interacting partner of a potential protein may be equally interesting as the
-protein itself. We have used the HPRD_ binary interaction data to build a p-p network graph which can be explored by GEMINI.
-
-.. _HPRD: http://www.ncbi.nlm.nih.gov/pubmed/18988627
-
-
-@CITATION@
-
-
- 10.1093/nar/gkn892
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_load.xml
--- a/gemini_load.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-
- Loading a VCF file into GEMINI
-
- gemini_macros.xml
- load
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Before we can use GEMINI to explore genetic variation, we must first load our VCF file into the GEMINI database framework.
-We expect you to have first annotated the functional consequence of each variant in your VCF using either VEP or snpEff.
-
-http://gemini.readthedocs.org/en/latest/content/loading.html
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_lof_sieve.xml
--- a/gemini_lof_sieve.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-
- Filter LoF variants by transcript position and type
-
- gemini_macros.xml
- lof_sieve
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Not all candidate LoF variants are created equal. For e.g, a nonsense (stop gain) variant impacting the first 5% of a polypeptide is far
-more likely to be deleterious than one affecting the last 5%. Assuming you’ve annotated your VCF with snpEff v3.0+, the lof_sieve tool
-reports the fractional position (e.g. 0.05 for the first 5%) of the mutation in the amino acid sequence.
-In addition, it also reports the predicted function of the transcript so that one can segregate candidate
-LoF variants that affect protein_coding transcripts from processed RNA, etc.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_macros.xml
--- a/gemini_macros.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-
-
-
- gemini
- grabix
- tabix
- samtools
- bedtools
-
-
-
-
-
- gemini --version
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- #if str($filter.filter_selector) == 'yes' and $filter.filter:
- #import pipes
- --filter ${ pipes.quote( str( $filter.filter ) ) or "''" }
- #end if
-
-
-
-
-
-
-
-
- 0.10.1
-
- ------
-
-**Citation**
-
-If you use GEMINI in your research, please cite the following manuscript:
-
-
-
-
- 10.1371/journal.pcbi.1003153
-
-
-
-
-
-
-
-
-
- value is not None and value.metadata.gemini_version == "@VERSION@"
-
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_pathways.xml
--- a/gemini_pathways.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-
- Map genes and variants to KEGG pathways
-
- gemini_macros.xml
- pathways
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-Mapping genes to biological pathways is useful in understanding the function/role played by a gene.
-Likewise, genes involved in common pathways is helpful in understanding heterogeneous diseases.
-We have integrated the KEGG pathway mapping for gene variants, to explain/annotate variation.
-
-This requires your VCF be annotated with either snpEff/VEP.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_query.xml
--- a/gemini_query.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-
- Querying the GEMINI database
-
- gemini_macros.xml
- query
-
-
-
-
-
- 0:
- --min-kindreds $min_kindreds
- #end if
- ##--format FORMAT Format of output (JSON, TPED or default) # we will take default for the time being
- ## --sample-delim STRING The delimiter to be used with the --show-samples option.
-
- #if $q.strip():
- -q "${q}"
- #end if
-
- "${ infile }"
- > "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation.
-The expressive power of SQL allows one to pose intricate questions of one’s variation data. This tool offers you an easy way to query your variants!
-
-http://gemini.readthedocs.org/en/latest/content/querying.html
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_region.xml
--- a/gemini_region.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-
- Extracting variants from specific regions or genes
-
- gemini_macros.xml
- region
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-One often is concerned with variants found solely in a particular gene or genomic region.
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_roh.xml
--- a/gemini_roh.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-
- Identifying runs of homozygosity
-
- gemini_macros.xml
- roh
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-===========================================================================
-``ROH``: Identifying runs of homozygosity
-===========================================================================
-Runs of homozygosity are long stretches of homozygous genotypes that reflect
-segments shared identically by descent and are a result of consanguinity or
-natural selection. Consanguinity elevates the occurrence of rare recessive
-diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious
-mutations. Hence, the identification of these runs holds medical value.
-
-The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data.
-The tool basically looks at every homozygous position on the chromosome as a possible
-start site for the run and looks for those that could give rise to a potentially long
-stretch of homozygous genotypes.
-
-For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u)
-the possible roh runs (H) would be:
-
-
-::
-
- genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
- roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
- roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
- roh_run3 = H H H H H u H H H H H H H h H H H H H
- roh_run4 = H H H H H H H h H H H H H
-
-roh returned for --min-snps = 20 would be:
-
-::
-
- roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
- roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
-
-
-As you can see, the immediate homozygous position right of a break (h or u) would be the possible
-start of a new roh run and genotypes to the left of a break are pruned since they cannot
-be part of a longer run than we have seen before.
-
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_stats.xml
--- a/gemini_stats.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-
- Compute useful variant statistics
-
- gemini_macros.xml
- stats
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-The stats tool computes some useful variant statistics for a GEMINI database.
-
-
-$ gemini stats --summarize "select * from variants where in_dbsnp=1 and chrom='chr1'" my.db
-sample total num_het num_hom_alt
-M10475 1 1 0
-M128215 1 1 0
-M10478 2 2 0
-M10500 2 1 1
-
-
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 gemini_windower.xml
--- a/gemini_windower.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-
- Conducting analyses on genome "windows"
-
- gemini_macros.xml
- windower
-
-
-
-
-
- "${ outfile }"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**What it does**
-
-It computs variation metrics across genomic windows (both fixed and sliding).
-
-@CITATION@
-
-
-
diff -r 27ce3de83007 -r b5207530f991 readme.rst
--- a/readme.rst Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-=========================
-Galaxy wrapper for GEMINI
-=========================
-
-
-GEMINI: a flexible framework for exploring genome variation
-
-GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of
-the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes,
-and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very
-powerful system for exploring genetic variation for for disease and population genetics.
-
-Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically
-annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks,
-OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows
-one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an
-enhanced SQL engine.
-
-Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
-
-
-============
-Installation
-============
-
-It is recommended to install this wrapper via the `Galaxy Tool Shed`.
-
-.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
-
-
-=======
-History
-=======
-- 0.9.1: Initial public release
-
-
-====================
-Detailed description
-====================
-
-View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
-
-
-===============================
-Wrapper Licence (MIT/BSD style)
-===============================
-
-Permission to use, copy, modify, and distribute this software and its
-documentation with or without modifications and for any purpose and
-without fee is hereby granted, provided that any copyright notices
-appear in all copies and that both those copyright notices and this
-permission notice appear in supporting documentation, and that the
-names of the contributors or copyright holders not be used in
-advertising or publicity pertaining to distribution of the software
-without specific prior permission.
-
-THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
-OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-OR PERFORMANCE OF THIS SOFTWARE.
-
diff -r 27ce3de83007 -r b5207530f991 repository_dependencies.xml
--- a/repository_dependencies.xml Fri Oct 16 13:55:09 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-
-
-
-
diff -r 27ce3de83007 -r b5207530f991 tool-data/gemini_databases.loc.sample
--- a/tool-data/gemini_databases.loc.sample Fri Oct 16 13:55:09 2015 -0400
+++ b/tool-data/gemini_databases.loc.sample Tue Dec 29 10:19:47 2015 -0500
@@ -1,3 +1,3 @@
## GEMINI databases
-#Version dbkey Description Path
-#08_08_2014 hg19 Database (08-08-2014) /path/to/data
+#Version dbkey Description
+#08_08_2014 hg19 Database (08-08-2014)
diff -r 27ce3de83007 -r b5207530f991 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Oct 16 13:55:09 2015 -0400
+++ b/tool_dependencies.xml Tue Dec 29 10:19:47 2015 -0500
@@ -1,18 +1,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+