# HG changeset patch
# User elixir-it
# Date 1541761445 18000
# Node ID 3d969c748317f5454ce68dd507024a78dc71b413
Uploaded
diff -r 000000000000 -r 3d969c748317 bed_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bed_macros.xml Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 3d969c748317 covacs_VariantRecalibrator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/covacs_VariantRecalibrator.xml Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,223 @@
+
+ GATK VariantRecalibrator wrapper Version = 3.8
+
+ bed_macros.xml
+ vcf_macros.xml
+
+
+ gatk
+
+
+ $log
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: warningmark
+
+**IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file
+
+ **more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantrecalibration_VariantRecalibrator.php
+
+-----
+
+**Implemented options** VariantRecalibrator:
+
+**-L** : One or more genomic intervals over which to operate(file.bed)
+
+**-ip** Amount of padding (in bp) to add to each interval
+
+**--resource:NAME,known=true/false,training=true/false,truth=true/false,prior=float $file** :A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)
+
+**-mode** : Recalibration mode to employ (SNP|INDEL)
+
+**-an** : annotations which should used for calculations
+
+**-tranche** The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)
+
+**in case of indels mode**
+
+**--minNumBadVariants** : Minimum number of bad variants
+
+**--maxGaussians** : Max number of Gaussians for the positive model
+
+**-mNG** : Max number of Gaussians for the negative model
+
+**OUTPUTS**
+
+-recalFile
+
+-tranchesFile
+
+-----
+
+.. class:: infomark
+
+**Recommended CoVaCS command**
+
+**-ip** 100
+
+**-R** genome.fa
+
+**-input** VCF
+
+**-resource**:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap.vcf
+
+**-resource**:omni,known=false,training=true,truth=true,prior=12.0 omni.vcf
+
+**-resource**:1000G,known=false,training=false,truth=false,prior=8.0 1000G.vcf
+
+**-resource**:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp.vcf
+
+**-mode** SNP
+
+**-an** DP **-an** QD **-an** MQ **-an** MQRankSum **-an** ReadPosRankSum **-an** FS
+
+**-tranche** 100.0 **-tranche** 99.5 **-tranche** 99.0 **-tranche** 98.5 **-tranche** 90.0
+
+
+
+ 10.1186/s12864-018-4508-1
+
+
+
diff -r 000000000000 -r 3d969c748317 mv_untar_gatk.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mv_untar_gatk.sh Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,9 @@
+#!/bin/bash
+#if the .jar file is not present in the conda_prefix the script search the tar.gz in the conda_prefix of the vm
+#and untar the archive
+if [[ ! -f $CONDA_PREFIX/../../GenomeAnalysisTK.jar ]] ; then
+ tar -zxvf $CONDA_PREFIX/../../GenomeAnalysis*.tar.gz -C $CONDA_PREFIX/../../
+
+else
+ echo GATK is present
+fi
diff -r 000000000000 -r 3d969c748317 tool-data/covacs_bed.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_bed.loc.sample Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory bed file for covacs sequences data files. You will need
+#to create these data files and then create a bed_loc.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bed_loc.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#
+#
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg19 hg19 hg19-padded /export/BED/S07084713_Padded.bed
+hgbed hg19 hg19-bed-test /export/BED/chr22.bed
diff -r 000000000000 -r 3d969c748317 tool-data/covacs_gatk_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_gatk_indexes.loc.sample Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,36 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of all covacs wrapper that need a gatk reference. You will need
+#to create these data files and then create a covacs_gatk_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The covacs_gatk_indexes.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#
+#
+#So, for example, if you had phiX indexed stored in
+#/depot/data2/galaxy/phiX/base/,
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.dict, phiX.fa.fai files.
+#
+#
+#Your covacs_gatk_indexes.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg38 hg38 hg38_GDC /export/gatkhg38pl/GRCh38.d1.vd1.fa
+hg19 hg19 hg19 /export/gatk_hg19_index_bundle/ucsc.hg19.fasta
diff -r 000000000000 -r 3d969c748317 tool-data/covacs_vcf.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_vcf.loc.sample Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory vcf file for covacs sequences data files. You will need
+#to create these data files and then create a vcf_loc.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The vcf_loc.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#
+#
+#So, for example, if you had vcf file stored in
+#/export/resource/,
+#then the covacs_vcf.loc entry would look like this:
+#
+#hapmap hapmap /export/resource/hapmap.vcf
+#
+#and your /export/resource directory
+#would contain hapmap.vcf.
+#
+#
+#Your covacs_vcf.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter.
+#
+hapmap hapmap /export/resources/hapmap.vcf
+1000G 1000G /export/resources/1000G.vcf
+
diff -r 000000000000 -r 3d969c748317 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Nov 09 06:04:05 2018 -0500
@@ -0,0 +1,17 @@
+
+
+