Mercurial > repos > iuc > gemini
changeset 17:65f742e605ec draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 344140b8df53b8b7024618bb04594607a045c03a
author | iuc |
---|---|
date | Mon, 04 May 2015 22:46:38 -0400 |
parents | ae03de7a9fee |
children | ce61d4876838 |
files | gemini_annotate.xml gemini_autosomal_recessive.xml gemini_burden.xml gemini_comp_hets.xml gemini_db_info.xml gemini_de_novo.xml gemini_interactions.xml gemini_load.xml gemini_lof_sieve.xml gemini_pathways.xml gemini_query.xml gemini_region.xml gemini_roh.xml gemini_stats.xml gemini_windower.xml |
diffstat | 15 files changed, 102 insertions(+), 102 deletions(-) [+] |
line wrap: on
line diff
--- a/gemini_annotate.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_annotate.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>adding your own custom annotations</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">annotate</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ @@ -27,7 +28,6 @@ ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> <param name="annotate_source" type="data" format="bed" label="File containing the annotations in BED format" help="(-f)"/> @@ -36,7 +36,7 @@ label="The name of the column to be added to the variant table" help="(-c)"> <sanitizer invalid_char=" "> <valid initial="string.letters,string.digits"> - <add value="_" /> + <add value="_" /> </valid> </sanitizer> </param> @@ -48,7 +48,7 @@ </param> <when value="extract"> - <param name="column_extracts" label="Column to extract information from for list annotations" + <param name="column_extracts" label="Column to extract information from for list annotations" type="data_column" data_ref="annotate_source" force_select="true" help="(-e)"/>
--- a/gemini_autosomal_recessive.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_autosomal_recessive.xml Mon May 04 22:46:38 2015 -0400 @@ -1,13 +1,14 @@ <tool id="gemini_recessive_and_dominant" name="GEMINI autosomal recessive/dominant" version="@VERSION@.0"> <description>Find variants meeting an autosomal recessive/dominant model</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ - gemini + gemini #if str($rec_or_dom) == 'recessive': ## start autosomal_recessive @@ -32,7 +33,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <param name="rec_or_dom" type="select" label="Autosomal ..." help=""> @@ -58,8 +58,8 @@ <help> **What it does** -Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, one can leverage a -built-in tool for identifying variants that meet an autosomal recessive or dominant inheritance pattern. +Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, one can leverage a +built-in tool for identifying variants that meet an autosomal recessive or dominant inheritance pattern. The reported variants will be restricted to those variants having the potential to impact the function of affecting protein coding transcripts. @CITATION@
--- a/gemini_burden.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_burden.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>perform sample-wise gene-level burden calculations</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">burden</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -26,30 +27,29 @@ ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> <param name="cases" size="30" type="text" value="" label="Space separated list of cases for association testing" help="(--cases)"/> <param name="controls" size="30" type="text" value="" label="Space separated list of controls for association testing" help="(--controls)"/> - <param name="save_tscores" type="boolean" truevalue="--save_tscores" falsevalue="" checked="False" + <param name="save_tscores" type="boolean" truevalue="--save_tscores" falsevalue="" checked="False" label="Save the permuted T-scores in the output file" help="(--save_tscores)"/> - <param name="nonsynonymous" type="boolean" truevalue="--nonsynonymous" falsevalue="" checked="False" + <param name="nonsynonymous" type="boolean" truevalue="--nonsynonymous" falsevalue="" checked="False" label="Count all nonsynonymous variants as contributing burden" help="(--nonsynonymous)"/> - <param name="calpha" type="boolean" truevalue="--calpha" falsevalue="" checked="False" + <param name="calpha" type="boolean" truevalue="--calpha" falsevalue="" checked="False" label="Run the C-alpha association test" help="(--calpha)"/> - <param name="min_aaf" type="float" value="-1" size="5" label="The min. alt. allele frequency for a variant to be included" + <param name="min_aaf" type="float" value="-1" size="5" label="The min. alt. allele frequency for a variant to be included" help="(--min-aaf)"> <!--validator type="in_range" min="0.0"/--> </param> - <param name="max_aaf" type="float" value="-1" size="5" label="The max. alt. allele frequency for a variant to be included" + <param name="max_aaf" type="float" value="-1" size="5" label="The max. alt. allele frequency for a variant to be included" help="(--max-aaf)"> <!--validator type="in_range" min="0.0"/--> </param> - <param name="permutations" type="integer" value="1000" size="10" label="Number of permutations to run for the C-alpha test" + <param name="permutations" type="integer" value="1000" size="10" label="Number of permutations to run for the C-alpha test" help="(--permutations)"> <validator type="in_range" min="0"/> </param> @@ -65,7 +65,7 @@ <help> **What it does** -The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. +The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample. $ gemini burden test.burden.db
--- a/gemini_comp_hets.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_comp_hets.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Identifying potential compound heterozygotes</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">comp_hets</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -23,14 +24,13 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> <expand macro="add_header_column" /> <expand macro="column_filter" /> - <param name="only_affected" type="boolean" truevalue="--only-affected" falsevalue="" checked="False" + <param name="only_affected" type="boolean" truevalue="--only-affected" falsevalue="" checked="False" label="Report solely those compund heterozygotes impacted a sample labeled as affected" help="(--only-affected)"/> - <param name="ignore_phasing" type="boolean" truevalue="--ignore-phasing" falsevalue="" checked="False" + <param name="ignore_phasing" type="boolean" truevalue="--ignore-phasing" falsevalue="" checked="False" label="Ignore phasing when screening for compound hets" help="Candidates are inherently putative. (--ignore-phasing)"/> <expand macro="filter" /> </inputs> @@ -44,13 +44,13 @@ <help> **What it does** -Many recessive disorders are caused by compound heterozygotes. Unlike canonical recessive sites where the same recessive allele is -inherited from both parents at the _same_ site in the gene, compound heterozygotes occur when the individual’s phenotype is caused +Many recessive disorders are caused by compound heterozygotes. Unlike canonical recessive sites where the same recessive allele is +inherited from both parents at the _same_ site in the gene, compound heterozygotes occur when the individual’s phenotype is caused by two heterozygous recessive alleles at _different_ sites in a particular gene. -So basically, we are looking for two (typically loss-of-function (LoF)) heterozygous variants impacting the same gene at different loci. -The complicating factor is that this is _recessive_ and as such, we must also require that the consequential alleles at each heterozygous -site were inherited on different chromosomes (one from each parent). As such, in order to use this tool, we require that all variants are phased. +So basically, we are looking for two (typically loss-of-function (LoF)) heterozygous variants impacting the same gene at different loci. +The complicating factor is that this is _recessive_ and as such, we must also require that the consequential alleles at each heterozygous +site were inherited on different chromosomes (one from each parent). As such, in order to use this tool, we require that all variants are phased. Once this has been done, the comp_hets tool will provide a report of candidate compound heterozygotes for each sample/gene.
--- a/gemini_db_info.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_db_info.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>List the gemini database tables and columns</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">db_info</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -13,7 +14,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> </inputs> @@ -27,7 +27,7 @@ <help> **What it does** -Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote. +Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote. If you can’t recall the name of particular column, just use the db_info tool. It will report all of the tables and all of the columns / types in each table. @CITATION@
--- a/gemini_de_novo.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_de_novo.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Identifying potential de novo mutations</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">de_novo</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -21,7 +22,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> @@ -39,7 +39,7 @@ <help> **What it does** -Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, +Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, you can use this tool for identifying de novo (a.k.a spontaneous) mutations that arise in offspring. @CITATION@
--- a/gemini_interactions.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_interactions.xml Mon May 04 22:46:38 2015 -0400 @@ -1,14 +1,15 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Find genes among variants that are interacting partners</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">interactions</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ - gemini + gemini --annotation-dir ${annotation_databases.fields.path} #if $gene.gene_selector == 'lof': ## lof interactions is a separate program @@ -25,7 +26,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" />
--- a/gemini_load.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_load.xml Mon May 04 22:46:38 2015 -0400 @@ -1,16 +1,17 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Loading a VCF file into GEMINI</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">load</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ ln -s "${ infile }" input.vcf && - gemini + gemini --annotation-dir ${annotation_databases.fields.path} @BINARY@ -v input.vcf @@ -33,7 +34,6 @@ "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <param name="infile" type="data" format="vcf" label="VCF file to be loaded in the GEMINI database" help="Only build 37 (aka hg19) of the human genome is supported."> <options> @@ -51,22 +51,22 @@ <param name="ped" type="data" format="tablar" optional="True" label="Sample information file in PED+ format" help="(-p)" /> <expand macro="annotation_dir" /> - <param name="skip_gerp_bp" type="boolean" truevalue="--skip-gerp-bp" falsevalue="" checked="False" + <param name="skip_gerp_bp" type="boolean" truevalue="--skip-gerp-bp" falsevalue="" checked="False" label="Do not load GERP scores at base pair resolution" help="(--skip-gerp-bp)"/> - <param name="skip_cadd" type="boolean" truevalue="--skip-cadd" falsevalue="" checked="False" + <param name="skip_cadd" type="boolean" truevalue="--skip-cadd" falsevalue="" checked="False" label="Do not load CADD scores" help="(--skip-cadd)"/> - <param name="skip_gene_tables" type="boolean" truevalue="--skip-gene-tables" falsevalue="" checked="False" + <param name="skip_gene_tables" type="boolean" truevalue="--skip-gene-tables" falsevalue="" checked="False" label="Do not load gene tables" help="(--skip-gene-tables)"/> - <param name="no_load_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" + <param name="no_load_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" label="Genotypes exist in the file, but should not be stored" help="(--no-load-genotypes)"/> - <param name="no_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" + <param name="no_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" label="There are no genotypes in the file" help="e.g. some 1000G VCFs (--no-genotypes)"/> - <param name="passonly" type="boolean" truevalue="--passonly" falsevalue="" checked="False" + <param name="passonly" type="boolean" truevalue="--passonly" falsevalue="" checked="False" label="Keep only variants that pass all filters" help="e.g. some 1000G VCFs (--passonly)"/> </inputs>
--- a/gemini_lof_sieve.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_lof_sieve.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Filter LoF variants by transcript position and type</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">lof_sieve</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -13,7 +14,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> </inputs> @@ -27,10 +27,10 @@ <help> **What it does** -Not all candidate LoF variants are created equal. For e.g, a nonsense (stop gain) variant impacting the first 5% of a polypeptide is far -more likely to be deleterious than one affecting the last 5%. Assuming you’ve annotated your VCF with snpEff v3.0+, the lof_sieve tool -reports the fractional position (e.g. 0.05 for the first 5%) of the mutation in the amino acid sequence. -In addition, it also reports the predicted function of the transcript so that one can segregate candidate +Not all candidate LoF variants are created equal. For e.g, a nonsense (stop gain) variant impacting the first 5% of a polypeptide is far +more likely to be deleterious than one affecting the last 5%. Assuming you’ve annotated your VCF with snpEff v3.0+, the lof_sieve tool +reports the fractional position (e.g. 0.05 for the first 5%) of the mutation in the amino acid sequence. +In addition, it also reports the predicted function of the transcript so that one can segregate candidate LoF variants that affect protein_coding transcripts from processed RNA, etc. @CITATION@
--- a/gemini_pathways.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_pathways.xml Mon May 04 22:46:38 2015 -0400 @@ -1,14 +1,15 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Map genes and variants to KEGG pathways</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">pathways</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ - gemini + gemini --annotation-dir ${annotation_databases.fields.path} @BINARY@ -v $ensembl @@ -17,16 +18,15 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> - <param name="ensembl" type="integer" value="68" size="5" label="Version of ensembl genes to use" + <param name="ensembl" type="integer" value="68" size="5" label="Version of ensembl genes to use" help="Supported versions: 66 to 71. use versions that match the VEP/snpEff versions of the annotated vcf for correctness. For e.g VEP v2.6 and snpEff v3.1 use Ensembl 68 version of the genomes. (-v)"> <validator type="in_range" min="66" max="71"/> </param> - <param name="lof" type="boolean" truevalue="--lof" falsevalue="" checked="False" + <param name="lof" type="boolean" truevalue="--lof" falsevalue="" checked="False" label="Report only pathways with loss-of-function variants" help="(--lof)"/> <expand macro="annotation_dir" /> </inputs> @@ -40,9 +40,9 @@ <help> **What it does** -Mapping genes to biological pathways is useful in understanding the function/role played by a gene. -Likewise, genes involved in common pathways is helpful in understanding heterogeneous diseases. -We have integrated the KEGG pathway mapping for gene variants, to explain/annotate variation. +Mapping genes to biological pathways is useful in understanding the function/role played by a gene. +Likewise, genes involved in common pathways is helpful in understanding heterogeneous diseases. +We have integrated the KEGG pathway mapping for gene variants, to explain/annotate variation. This requires your VCF be annotated with either snpEff/VEP.
--- a/gemini_query.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_query.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Querying the GEMINI database</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">query</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -47,7 +48,6 @@ - -carrier-summary-by-phenotype CARRIER_SUMMARY Output columns of counts of carriers and non-carriers stratified by the given sample phenotype column--> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> @@ -61,13 +61,13 @@ <expand macro="sanitize_query" /> </param> - <param name="show_samples" type="boolean" truevalue="--show-samples" falsevalue="" checked="False" + <param name="show_samples" type="boolean" truevalue="--show-samples" falsevalue="" checked="False" label="Add a column of all sample names with a variant to each variant" help="(--show-samples)"/> - <param name="show_families" type="boolean" truevalue="--show-families" falsevalue="" checked="False" + <param name="show_families" type="boolean" truevalue="--show-families" falsevalue="" checked="False" label="Add a column listing all of the families with a variant to each variant" help="(--show-families)"/> - <param name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False" + <param name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False" label="Perform the sample-filter on a family-wise basis" help="(--family-wise)"/> <expand macro="add_header_column" /> @@ -75,7 +75,7 @@ <!-- TODO: is there any default values set? --> <param name="min_kindreds" size="4" type="integer" value="-1" label="Minimum number of families for a variant passing a family-wise filter to be in" help="-1 means default values (--min-kindreds)" /> - <param name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False" + <param name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False" label="Request drug-gene interaction info from DGIdb" help="(--dgidb)"/> <param name="in" type="select" label="A variant must be in either all, none or any samples passing the sample-query filter" help="(--in)"> @@ -99,7 +99,7 @@ <help> **What it does** -The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation. +The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation. The expressive power of SQL allows one to pose intricate questions of one’s variation data. This tool offers you an easy way to query your variants! http://gemini.readthedocs.org/en/latest/content/querying.html
--- a/gemini_region.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_region.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Extracting variants from specific regions or genes</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">region</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -31,7 +32,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" />
--- a/gemini_roh.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_roh.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Identifying runs of homozygosity</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">roh</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -22,7 +23,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> @@ -32,7 +32,7 @@ <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)"> <validator type="in_range" min="0"/> </param> - <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" + <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" help="default: 0 (--min-gt-depth)"> <validator type="in_range" min="0"/> </param> @@ -66,16 +66,16 @@ =========================================================================== Runs of homozygosity are long stretches of homozygous genotypes that reflect segments shared identically by descent and are a result of consanguinity or -natural selection. Consanguinity elevates the occurrence of rare recessive -diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious -mutations. Hence, the identification of these runs holds medical value. +natural selection. Consanguinity elevates the occurrence of rare recessive +diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious +mutations. Hence, the identification of these runs holds medical value. -The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. +The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. The tool basically looks at every homozygous position on the chromosome as a possible -start site for the run and looks for those that could give rise to a potentially long -stretch of homozygous genotypes. +start site for the run and looks for those that could give rise to a potentially long +stretch of homozygous genotypes. -For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) +For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) the possible roh runs (H) would be: @@ -90,13 +90,13 @@ roh returned for --min-snps = 20 would be: :: - + roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H -As you can see, the immediate homozygous position right of a break (h or u) would be the possible -start of a new roh run and genotypes to the left of a break are pruned since they cannot +As you can see, the immediate homozygous position right of a break (h or u) would be the possible +start of a new roh run and genotypes to the left of a break are pruned since they cannot be part of a longer run than we have seen before.
--- a/gemini_stats.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_stats.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Compute useful variant statistics</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">stats</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -14,7 +15,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" />
--- a/gemini_windower.xml Tue Apr 28 22:55:56 2015 -0400 +++ b/gemini_windower.xml Mon May 04 22:46:38 2015 -0400 @@ -1,11 +1,12 @@ <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> <description>Conducting analyses on genome "windows"</description> - <expand macro="requirements" /> - <expand macro="version_command" /> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">windower</token> </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ @@ -17,7 +18,6 @@ > "${ outfile }" ]]> </command> - <expand macro="stdio" /> <inputs> <expand macro="infile" /> @@ -34,12 +34,12 @@ <option value="collapse">collapse</option> </param> - <param name="w" type="integer" value="10000" size="10" label="The window size in bp" + <param name="w" type="integer" value="10000" size="10" label="The window size in bp" help="(-w)"> <validator type="in_range" min="0"/> </param> - <param name="s" type="integer" value="1000" size="10" label="The step size for the windows in bp" + <param name="s" type="integer" value="1000" size="10" label="The step size for the windows in bp" help="(-s)"> <validator type="in_range" min="0"/> </param>