# HG changeset patch
# User bgruening
# Date 1390046426 18000
# Node ID e67da4f2c9bfbdfdf5b7d115622f3ecce13428cc
# Parent b7ef18eaa9c988f034ea62fb2fb04b7825d3e6db
Uploaded
diff -r b7ef18eaa9c9 -r e67da4f2c9bf base_recalibrator.xml
--- a/base_recalibrator.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/base_recalibrator.xml Sat Jan 18 07:00:26 2014 -0500
@@ -17,9 +17,8 @@
\$GATK2_SITE_OPTIONS
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
- --num_cpu_threads_per_data_thread 8
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-8}
- @THREADS@
## we set non standards at every run and the user can choose which ones are preferred
## in our select box both standard options (ContextCovariate, CycleCovariate) are selected by default
--no_standard_covs
@@ -111,12 +110,7 @@
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
diff -r b7ef18eaa9c9 -r e67da4f2c9bf gatk2_macros.xml
--- a/gatk2_macros.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/gatk2_macros.xml Sat Jan 18 07:00:26 2014 -0500
@@ -1,17 +1,23 @@
-
-
- samtools
- GATK2_PATH
- GATK2_SITE_OPTIONS
-
-
-
- --num_threads \${GALAXY_SLOTS:-4}
-
-
- java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
-
+
+
+ gatk2
+ samtools
+ GATK2_PATH
+ GATK2_SITE_OPTIONS
+
+
+
+ --num_threads \${GALAXY_SLOTS:-4}
+
+
+ java -jar "\$GATK2_PATH/GenomeAnalysisTK.jar"
+
+
+ #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
+ -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
+ #end if
+
##start standard gatk options
#if $gatk_param_type.gatk_param_type_selector == "advanced":
@@ -311,6 +317,21 @@
+
+
+
+
+
+
+
+
+
+
+ ^\w+$
+
+
+
+ ------
**Citation**
diff -r b7ef18eaa9c9 -r e67da4f2c9bf gatk2_picard_index.loc.sample
--- a/gatk2_picard_index.loc.sample Mon Dec 02 10:37:45 2013 -0500
+++ b/gatk2_picard_index.loc.sample Sat Jan 18 07:00:26 2014 -0500
@@ -24,7 +24,3 @@
#the dict file does not have the .fa extension although the
#path list in the loc file does include it.
#
-hg18 hg18 hg18 /data/galaxy/ext-tool-data/picard/hg18.fa
-hg19 hg19 hg19 /data/galaxy/ext-tool-data/picard/hg19.fa
-mm8 mm8 mm8 /data/galaxy/ext-tool-data/picard/mm8.fa
-mm9 mm9 mm9 /data/galaxy/ext-tool-data/picard/mm9.fa
diff -r b7ef18eaa9c9 -r e67da4f2c9bf haplotype_caller.xml
--- a/haplotype_caller.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/haplotype_caller.xml Sat Jan 18 07:00:26 2014 -0500
@@ -7,10 +7,12 @@
gatk2_wrapper.py
--stdout "${output_log}"
- -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
- #if str( $reference_source.input_bam.metadata.bam_index ) != "None":
- -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
- #end if
+ #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+ -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+ #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
+ -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+ #end if
+ #end for
-p '
@JAR_PATH@
-T "HaplotypeCaller"
@@ -18,7 +20,7 @@
\$GATK2_SITE_OPTIONS
- @THREADS@
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4}
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
@@ -27,14 +29,12 @@
--BQSR "${input_recal}"
#end if
'
+ @DBSNP_OPTIONS@
#include source=$standard_gatk_options#
##start analysis specific options
#if $analysis_param_type.analysis_param_type_selector == "advanced":
-p '
- #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0:
- --p_nonref_model $analysis_param_type.p_nonref_model
- #end if
#if $analysis_param_type.heterozygosity.__str__.strip() != '':
--heterozygosity $analysis_param_type.heterozygosity
#end if
@@ -42,8 +42,8 @@
#if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
--alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
#end if
- #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0:
- --output_mode $analysis_param_type.output_mode
+ #if not $analysis_param_type.emitRefConfidence is None:
+ --emitRefConfidence $analysis_param_type.emitRefConfidence
#end if
## files
@@ -53,9 +53,6 @@
#if str($analysis_param_type.comp) != 'None':
--comp "$analysis_param_type.comp"
#end if
- #if str($analysis_param_type.dbsnp) != 'None':
- --dbsnp "$analysis_param_type.dbsnp"
- #end if
##
#if str( $analysis_param_type.annotation ) != "None":
#for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
@@ -80,9 +77,6 @@
#if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '':
--contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter
#end if
- #if $analysis_param_type.downsampleRegion.__str__.strip() != '':
- --downsampleRegion $analysis_param_type.downsampleRegion
- #end if
#if $analysis_param_type.minPruning.__str__.strip() != '':
--minPruning $analysis_param_type.minPruning
#end if
@@ -99,9 +93,7 @@
--max_alternate_alleles $analysis_param_type.max_alternate_alleles
#end if
## mode selections
- #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0:
- --genotyping_mode $analysis_param_type.genotyping_mode
- #end if
+
#if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:
--pair_hmm_implementation $analysis_param_type.pair_hmm_implementation
#end if
@@ -121,35 +113,31 @@
#end if
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
-
-
-
-
+
+
+
+
+
+
-
+
-
-
-
-
-
-
+
+
+
+
+
@@ -211,9 +199,7 @@
-
-
@@ -234,10 +220,11 @@
-
-
-
-
+
+
+
+
+
@@ -252,13 +239,6 @@
-
-
-
-
-
-
-
@@ -323,14 +303,12 @@
contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove
dbsnp dbSNP file
debug If specified, print out very verbose debug information about each triggering active region
- downsampleRegion coverage, per-sample, to downsample each active region to
excludeAnnotation One or more specific annotations to exclude
genotyping_mode Specifies how to determine the alternate alleles to use for genotyping
graphOutput File to which debug assembly graph information should be written
group One or more classes/groups of annotations to apply to variant calls
heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph
- output_mode Specifies which type of calls we should output
pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations
stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called
stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)
@@ -339,9 +317,6 @@
gcpHMM Flat gap continuation penalty for use in the Pair HMM
genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping
max_alternate_alleles Maximum number of alternate alleles to genotype
- p_nonref_model Non-reference probability calculation model to employ
-
-------
@CITATION_SECTION@
diff -r b7ef18eaa9c9 -r e67da4f2c9bf indel_realigner.xml
--- a/indel_realigner.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/indel_realigner.xml Sat Jan 18 07:00:26 2014 -0500
@@ -21,8 +21,6 @@
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
--num_cpu_threads_per_data_thread 1
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
diff -r b7ef18eaa9c9 -r e67da4f2c9bf print_reads.xml
--- a/print_reads.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/print_reads.xml Sat Jan 18 07:00:26 2014 -0500
@@ -18,9 +18,8 @@
\$GATK2_SITE_OPTIONS
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
- --num_cpu_threads_per_data_thread 8
+ --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-6}
- @THREADS@
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
diff -r b7ef18eaa9c9 -r e67da4f2c9bf readme.rst
--- a/readme.rst Mon Dec 02 10:37:45 2013 -0500
+++ b/readme.rst Sat Jan 18 07:00:26 2014 -0500
@@ -13,7 +13,6 @@
http://www.broadinstitute.org/gatk
http://www.broadinstitute.org/gatk/about/citing-gatk
-
GATK is Free for academics, and fee for commercial use. Please study the GATK licensing website:
http://www.broadinstitute.org/gatk/about/#licensing
@@ -23,35 +22,42 @@
The recommended installation is by means of the toolshed_.
-.. _toolshed: http://toolshed.g2.bx.psu.edu/view/bjoern-gruening/augustus
+.. _toolshed: http://toolshed.g2.bx.psu.edu/view/iuc/gatk2
-Galaxy should be able to automatically install samtools dependencies automatically
+Galaxy should be able to install samtools dependencies automatically
for you. GATK2, and its new licence model, does not allow us to distribute the GATK binaries.
-As a consequence you need to install GATK2 by your own, please see the GATK website for more informations:
+As a consequence you need to install GATK2 by your own, please see the GATK website for more information:
http://www.broadinstitute.org/gatk/download
-Once you have installed GATK2 you need to edit the env.sh file that is installed with these wrappers.
-You will find this env.sh file under:
+Once you have installed GATK2, you need to edit the env.sh files that are installed together with the wrappers.
+You must edit the GATK2_PATH environment variable in the file:
-/gatk2//iuc//env.sh
+/environment_settings/GATK2_PATH/iuc/gatk2//env.sh
+
+to point to the folder where you have installed GATK2.
+
+Optionally, you may also want to edit the GATK2_SITE_OPTIONS environment variable in the file:
-You should edit the GATK2_PATH environment variable to point to the folder you have installed GATK2
-and if you want to deactivate the 'call home feature' from GATK you can set
+/environment_settings/GATK2_SITE_OPTIONS/iuc/gatk2//env.sh
-GATK2_SITE_OPTIONS='-et "NO_ET" -K "/data/gatk2_key_file"'
+to deactivate the 'call home feature' of GATK with something like:
-GATK2_SITE_OPTIONS can be used to insert specific options into every GATK2 wrapper
-during runtime, without changing the actuall wrapper.
+GATK2_SITE_OPTIONS='-et NO_ET -K /data/gatk2_key_file'
-Read more about the "Phone Home" problem under:
+GATK2_SITE_OPTIONS can be also used to insert other specific options into every GATK2 wrapper
+at runtime, without changing the actual wrapper.
+
+Read more about the "Phone Home" problem at:
http://www.broadinstitute.org/gatk/guide/article?id=1250
+Optionally, you may also want to add some commands to be executed before GATK (e.g. to load modules) to the file:
+
+/gatk2/default/env.sh
Finally, you should fill in additional information about your genomes and
annotations in the gatk2_picard_index.loc and gatk2_annotations.txt.
-You can find them under ./tool-data/.
-
+You can find them in the tool-data/ Galaxy directory.
History
@@ -80,5 +86,3 @@
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-
-
diff -r b7ef18eaa9c9 -r e67da4f2c9bf reduce_reads.xml
--- a/reduce_reads.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/reduce_reads.xml Sat Jan 18 07:00:26 2014 -0500
@@ -21,8 +21,6 @@
## according to http://www.broadinstitute.org/gatk/guide/article?id=1975
--num_cpu_threads_per_data_thread 1
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
@@ -68,12 +66,7 @@
#end if
-
- The input covariates table file which enables on-the-fly base quality score recalibration.
- Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
- Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
-
-
+
@@ -228,8 +221,6 @@
-noclip_ad / --dont_hardclip_adaptor_sequences ( boolean with default value false )
Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired. The program will behave correctly in those cases.
-------
-
@CITATION_SECTION@
diff -r b7ef18eaa9c9 -r e67da4f2c9bf unified_genotyper.xml
--- a/unified_genotyper.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/unified_genotyper.xml Sat Jan 18 07:00:26 2014 -0500
@@ -31,22 +31,12 @@
--standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
--standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
'
- #set $rod_binding_names = dict()
- #for $rod_binding in $rod_bind:
- #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
- #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
- #else
- #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
- #end if
- #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
- -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
- #end for
-
+ @DBSNP_OPTIONS@
+
#include source=$standard_gatk_options#
##start analysis specific options
#if $analysis_param_type.analysis_param_type_selector == "advanced":
-p '
- --p_nonref_model "${analysis_param_type.p_nonref_model}"
--heterozygosity "${analysis_param_type.heterozygosity}"
--pcr_error_rate "${analysis_param_type.pcr_error_rate}"
--genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
@@ -120,30 +110,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -158,10 +125,6 @@
-
-
-
-
@@ -187,13 +150,11 @@
-
- Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.
-
+
+
-
- Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10.
-
+
+
@@ -252,14 +213,14 @@
-
-
+
+
+
-
@@ -310,7 +271,6 @@
**Settings**::
genotype_likelihoods_model Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH)
- p_nonref_model Non-reference probability calculation model to employ -- EXACT_GENERAL_PLOIDY is the default option, while EXACT_REFERENCE is also available. (EXACT_INDEPENDENT,EXACT_REFERENCE,EXACT_ORIGINAL,EXACT_GENERAL_PLOIDY)
heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
pcr_error_rate The PCR error rate to be used for computing fragment-based likelihoods
genotyping_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES)
diff -r b7ef18eaa9c9 -r e67da4f2c9bf variant_annotator.xml
--- a/variant_annotator.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/variant_annotator.xml Sat Jan 18 07:00:26 2014 -0500
@@ -51,10 +51,7 @@
-d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
#end for
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
- -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #end if
-
+ @DBSNP_OPTIONS@
#for $rod_binding in $resource_rod_bind:
-d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}"
@@ -136,20 +133,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -210,6 +194,7 @@
+
diff -r b7ef18eaa9c9 -r e67da4f2c9bf variant_eval.xml
--- a/variant_eval.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/variant_eval.xml Sat Jan 18 07:00:26 2014 -0500
@@ -32,9 +32,9 @@
#end if
#end for
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
+ #if $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector == 'set_dbsnp'
-d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #if str( $dbsnp_rod_bind_type.dbsnp_known_names ):
+ #if $dbsnp_rod_bind_type.dbsnp_known_names
-p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
#end if
#end if
@@ -122,24 +122,25 @@
-
+
-
-
+
+ ^\w+$
+
+
-
-
+
-
-
-
+
-
-
+
+ ^\w+$
+
+
@@ -228,6 +229,7 @@
+
diff -r b7ef18eaa9c9 -r e67da4f2c9bf variant_filtration.xml
--- a/variant_filtration.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/variant_filtration.xml Sat Jan 18 07:00:26 2014 -0500
@@ -15,8 +15,6 @@
-T "VariantFiltration"
\$GATK2_SITE_OPTIONS
- @THREADS@
-
-o "${output_vcf}"
#if $reference_source.reference_source_selector != "history":
@@ -83,7 +81,7 @@
-
+
diff -r b7ef18eaa9c9 -r e67da4f2c9bf variant_validate.xml
--- a/variant_validate.xml Mon Dec 02 10:37:45 2013 -0500
+++ b/variant_validate.xml Sat Jan 18 07:00:26 2014 -0500
@@ -14,18 +14,13 @@
\$GATK2_SITE_OPTIONS
- @THREADS@
-
#if $reference_source.reference_source_selector != "history":
-R "${reference_source.ref_file.fields.path}"
#end if
${warn_on_errors}
${do_not_validate_filtered_records}
'
-
- #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
- -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
- #end if
+ @DBSNP_OPTIONS@
#include source=$standard_gatk_options#
@@ -47,20 +42,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -78,6 +60,7 @@
+