# HG changeset patch # User Jim Johnson # Date 1356022915 21600 # Node ID 7533db8dfb5b1a91253d951a9b47b6ce72c92e72 # Parent f2b21dc45241ebcc91c4fd4c214a2fdc5da01d80 Update tool_dependencies to GATK v 2.3 diff -r f2b21dc45241 -r 7533db8dfb5b README --- a/README Thu Nov 15 10:18:55 2012 -0600 +++ b/README Thu Dec 20 11:01:55 2012 -0600 @@ -2,4 +2,18 @@ copied into the tool-data directory. The file tool_data_table_conf.xml must be edited to include references to these two new files. +GATK can be installed via tool_dependencies. +This will install GenomeAnalysisTKLite.jar from ftp://ftp.broadinstitute.org/pub/gsa/GenomeAnalysisTK/ +and create a symbolic link from GenomeAnalysisTK.jar to GenomeAnalysisTKLite.jar +If you have a full licensed copy of GenomeAnalysisTK.jar, change the GenomeAnalysisTK.jar symbolic link +in the installation directory to point to your licensed copy. + +Two environment variables: GATK2_SITE_OPTIONS and GATK2_THREAD_OPTIONS are also set in the tool_dependencies env.sh file. +You can alter the value of those variables for your site. +$ cat tool_dependencies/gatk/2.2/*/gatk2/*/env.sh +GATK2_PATH=/Users/jj/gxt/gxt/tool_dependencies/gatk/2.2/jimmy/gatk2/288cdae6bd9c; export GATK2_PATH +GATK2_SITE_OPTIONS="--phone_home STANDARD"; export GATK2_SITE_OPTIONS +GATK2_THREAD_OPTIONS="--num_threads 4 --num_cpu_threads_per_data_thread 3"; export GATK2_THREAD_OPTIONS + + diff -r f2b21dc45241 -r 7533db8dfb5b base_recalibrator.xml --- a/base_recalibrator.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/base_recalibrator.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + on BAM files - gatk + gatk samtools gatk2_wrapper.py @@ -15,6 +15,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "BaseRecalibrator" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -90,6 +91,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -417,7 +419,7 @@ - + diff -r f2b21dc45241 -r 7533db8dfb5b depth_of_coverage.xml --- a/depth_of_coverage.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/depth_of_coverage.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + on BAM files - gatk + gatk samtools gatk2_wrapper.py @@ -17,6 +17,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "DepthOfCoverage" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home @@ -84,6 +85,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -437,6 +439,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b haplotype_caller.xml --- a/haplotype_caller.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/haplotype_caller.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region - gatk + gatk samtools gatk2_wrapper.py @@ -16,6 +16,7 @@ -T "HaplotypeCaller" -o "${output_vcf}" ## \$GATK2_SITE_OPTIONS + ## \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home ##--num_threads 4 ##not supported yet ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -70,6 +71,7 @@ --defaultBaseQualities "${gatk_param_type.default_base_qualities}" --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -417,6 +419,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b indel_realigner.xml --- a/indel_realigner.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/indel_realigner.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - perform local realignment - gatk + gatk samtools gatk2_wrapper.py @@ -16,6 +16,7 @@ -T "IndelRealigner" -o "${output_bam}" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home ##--num_threads 4 ##hard coded, for now ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -82,6 +83,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -377,6 +379,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b print_reads.xml --- a/print_reads.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/print_reads.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + on BAM files - gatk + gatk samtools gatk2_wrapper.py @@ -22,7 +22,9 @@ #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" #end if - --BQSR "${input_recal}" + #if str($input_recal) != 'None': + --BQSR "${input_recal}" + #end if --disable_bam_indexing ' ##start standard gatk options @@ -70,6 +72,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -356,6 +359,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b realigner_target_creator.xml --- a/realigner_target_creator.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/realigner_target_creator.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + for use in local realignment - gatk + gatk samtools gatk2_wrapper.py @@ -16,6 +16,7 @@ -T "RealignerTargetCreator" -o "${output_interval}" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home ##--num_threads 4 ##hard coded, for now ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -79,6 +80,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -363,6 +365,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b reduce_reads.xml --- a/reduce_reads.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/reduce_reads.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + in BAM files - gatk + gatk samtools gatk2_wrapper.py @@ -16,6 +16,7 @@ -T "ReduceReads" -o "${output_bam}" ## \$GATK2_SITE_OPTIONS + ## \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "/data/galaxy/appList/GenomeAnalysisTK-2.0-36-gf5c1c1a/gatk2_key_file" ##ET no phone home ##--num_threads 4 ##not supported yet ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -72,6 +73,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -363,6 +365,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b tool_dependencies.xml --- a/tool_dependencies.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/tool_dependencies.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,9 +1,9 @@ - + - ftp://ftp.broadinstitute.org/pub/gsa/GenomeAnalysisTK/GenomeAnalysisTKLite-2.2-8-g99996f2.tar.bz2 + ftp://ftp.broadinstitute.org/pub/gsa/GenomeAnalysisTK/GenomeAnalysisTKLite-2.3-4-gb8f1308.tar.bz2 GenomeAnalysisTKLite.jar $INSTALL_DIR @@ -15,7 +15,8 @@ $INSTALL_DIR - "--num_threads 4 --num_cpu_threads_per_data_thread 3 --phone_home STANDARD" + "--phone_home STANDARD" + "--num_threads 4 --num_cpu_threads_per_data_thread 3" diff -r f2b21dc45241 -r 7533db8dfb5b unified_genotyper.xml --- a/unified_genotyper.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/unified_genotyper.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + SNP and indel caller - gatk + gatk samtools gatk2_wrapper.py @@ -20,6 +20,7 @@ --out "${output_vcf}" --metrics_file "${output_metrics}" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout #if $reference_source.reference_source_selector != "history": @@ -85,6 +86,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -419,6 +421,7 @@ + @@ -458,7 +461,7 @@ - + diff -r f2b21dc45241 -r 7533db8dfb5b variant_annotator.xml --- a/variant_annotator.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_annotator.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk samtools gatk2_wrapper.py @@ -19,6 +19,7 @@ ##--list -T "VariantAnnotator" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -117,6 +118,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -444,6 +446,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_apply_recalibration.xml --- a/variant_apply_recalibration.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_apply_recalibration.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk gatk2_wrapper.py --max_jvm_heap_fraction "1" @@ -13,6 +13,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "ApplyRecalibration" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home #if $reference_source.reference_source_selector != "history": @@ -68,6 +69,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -334,6 +336,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_combine.xml --- a/variant_combine.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_combine.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk gatk2_wrapper.py --max_jvm_heap_fraction "1" @@ -19,6 +19,7 @@ -T "CombineVariants" --out "${output_variants}" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -74,6 +75,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -354,6 +356,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_eval.xml --- a/variant_eval.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_eval.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk gatk2_wrapper.py #from binascii import hexlify @@ -15,6 +15,7 @@ -T "VariantEval" --out "${output_report}" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -82,6 +83,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -410,6 +412,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_filtration.xml --- a/variant_filtration.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_filtration.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + on VCF files - gatk + gatk gatk2_wrapper.py #from binascii import hexlify @@ -12,6 +12,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "VariantFiltration" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home -o "${output_vcf}" @@ -78,6 +79,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -363,6 +365,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_recalibrator.xml --- a/variant_recalibrator.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_recalibrator.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk gatk2_wrapper.py --max_jvm_heap_fraction "1" @@ -13,6 +13,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "VariantRecalibrator" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file"##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -86,6 +87,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -589,6 +591,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variant_select.xml --- a/variant_select.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variant_select.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + from VCF files - gatk + gatk gatk2_wrapper.py #from binascii import hexlify @@ -12,6 +12,7 @@ -jar "\$GATK2_PATH/GenomeAnalysisTK.jar" -T "SelectVariants" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##--num_threads 4 ##hard coded, for now ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home -o "${output_vcf}" @@ -138,6 +139,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -408,6 +410,7 @@ + diff -r f2b21dc45241 -r 7533db8dfb5b variants_validate.xml --- a/variants_validate.xml Thu Nov 15 10:18:55 2012 -0600 +++ b/variants_validate.xml Thu Dec 20 11:01:55 2012 -0600 @@ -1,7 +1,7 @@ - + - gatk + gatk gatk2_wrapper.py --max_jvm_heap_fraction "1" @@ -12,6 +12,7 @@ -T "ValidateVariants" \$GATK2_SITE_OPTIONS + \$GATK2_THREAD_OPTIONS ##-et "NO_ET" -K "\$GATK2_BASE/gatk2_key_file" ##ET no phone home ##--num_threads 4 ##hard coded, for now ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout @@ -71,6 +72,7 @@ --validation_strictness "${gatk_param_type.validation_strictness}" --interval_merging "${gatk_param_type.interval_merging}" ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.fix_misencoded_quality_scores} ${gatk_param_type.non_deterministic_random_seed} ' #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): @@ -334,6 +336,7 @@ +