# HG changeset patch # User david-hoover # Date 1347487755 14400 # Node ID 0a895e7c6d8e5c6412c520e7c14a28403ff2e0cd # Parent 40f8cd97979adce387479dd99b44ea072b8664d7 Uploaded diff -r 40f8cd97979a -r 0a895e7c6d8e unified_genotyper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unified_genotyper.xml Wed Sep 12 18:09:15 2012 -0400 @@ -0,0 +1,611 @@ + + SNP and indel caller + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $i, $input_bam in enumerate( $reference_source.input_bams ): + -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "UnifiedGenotyper" + --num_threads 4 ##hard coded, for now + --out "${output_vcf}" + --metrics_file "${output_metrics}" + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --genotype_likelihoods_model "${genotype_likelihoods_model}" + --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}" + --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}" + ' + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --p_nonref_model "${analysis_param_type.p_nonref_model}" + --heterozygosity "${analysis_param_type.heterozygosity}" + --pcr_error_rate "${analysis_param_type.pcr_error_rate}" + --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}" + #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES': + --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}" + #end if + --output_mode "${analysis_param_type.output_mode}" + ${analysis_param_type.compute_SLOD} + --min_base_quality_score "${analysis_param_type.min_base_quality_score}" + --max_deletion_fraction "${analysis_param_type.max_deletion_fraction}" + --max_alternate_alleles "${analysis_param_type.max_alternate_alleles}" + --min_indel_count_for_genotyping "${analysis_param_type.min_indel_count_for_genotyping}" + --indel_heterozygosity "${analysis_param_type.indel_heterozygosity}" + --indelGapContinuationPenalty "${analysis_param_type.indelGapContinuationPenalty}" + --indelGapOpenPenalty "${analysis_param_type.indelGapOpenPenalty}" + --indelHaplotypeSize "${analysis_param_type.indelHaplotypeSize}" + ${analysis_param_type.doContextDependentGapPenalties} + #if str( $analysis_param_type.annotation ) != "None": + #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','): + --annotation "${annotation}" + #end for + #end if + #for $additional_annotation in $analysis_param_type.additional_annotations: + --annotation "${additional_annotation.additional_annotation_name}" + #end for + #if str( $analysis_param_type.group ) != "None": + #for $group in str( $analysis_param_type.group ).split( ','): + --group "${group}" + #end for + #end if + #if str( $analysis_param_type.exclude_annotations ) != "None": + #for $annotation in str( $analysis_param_type.exclude_annotations.fields.gatk_value ).split( ','): + --excludeAnnotation "${annotation}" + #end for + #end if + ${analysis_param_type.multiallelic} + ' +## #if str( $analysis_param_type.snpEff_rod_bind_type.snpEff_rod_bind_type_selector ) == 'set_snpEff': +## -p '--annotation "SnpEff"' +## -d "--snpEffFile:${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name},%(file_type)s" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod}" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod.ext}" "input_snpEff_${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name}" +## #else: +## -p '--excludeAnnotation "SnpEff"' +## #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +A variant caller which unifies the approaches of several disparate callers. Works for single-sample and multi-sample data. The user can choose from several different incorporated calculation models. + +For more information on the GATK Unified Genotyper, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Unified_genotyper>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: UnifiedGenotyper accepts an aligned BAM input file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + genotype_likelihoods_model Genotype likelihoods calculation model to employ -- BOTH is the default option, while INDEL is also available for calling indels and SNP is available for calling SNPs only (SNP|INDEL|BOTH) + p_nonref_model Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available. (EXACT|GRID_SEARCH) + heterozygosity Heterozygosity value used to compute prior likelihoods for any locus + pcr_error_rate The PCR error rate to be used for computing fragment-based likelihoods + genotyping_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (DISCOVERY|GENOTYPE_GIVEN_ALLELES) + output_mode Should we output confident genotypes (i.e. including ref calls) or just the variants? (EMIT_VARIANTS_ONLY|EMIT_ALL_CONFIDENT_SITES|EMIT_ALL_SITES) + standard_min_confidence_threshold_for_calling The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called + standard_min_confidence_threshold_for_emitting The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold) + noSLOD If provided, we will not calculate the SLOD + min_base_quality_score Minimum base quality required to consider a base for calling + max_deletion_fraction Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05] + min_indel_count_for_genotyping Minimum number of consensus indels required to trigger genotyping run + indel_heterozygosity Heterozygosity for indel calling + indelGapContinuationPenalty Indel gap continuation penalty + indelGapOpenPenalty Indel gap open penalty + indelHaplotypeSize Indel haplotype size + doContextDependentGapPenalties Vary gap penalties by context + indel_recal_file Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE + indelDebug Output indel debug info + out File to which variants should be written + annotation One or more specific annotations to apply to variant calls + group One or more classes/groups of annotations to apply to variant calls + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + +