Mercurial > repos > dave > gatk4_mutect2
view gatk4_Mutect2.xml @ 1:fcf1bc63ed06 draft default tip
Uploaded
author | dave |
---|---|
date | Thu, 26 Sep 2019 09:28:27 -0400 |
parents | c27f4eb641bf |
children |
line wrap: on
line source
<tool id="gatk4_mutect2" name="GATK4 Mutect2" version="@WRAPPER_VERSION@0" profile="18.05"> <description>- Call somatic SNVs and indels via local assembly of haplotypes</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="version_cmd"/> <command detect_errors="exit_code"> <![CDATA[ #include source=$set_sections# #include source=$pre_gatk_excl_ints_chth# #include source=$bam_index_pre_chth# #include source=$pre_gatk_ints_chth# ln -s '$reference_source.reference_sequence' reference.fa && samtools faidx reference.fa && gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && @CMD_BEGIN@ Mutect2 #include source=$gatk_bam_input# ## COMMON PARAMETERS ## #if str($common.common_parameters) == 'yes' --read-filter="$common.read_filter" --seconds-between-progress-updates="$common.seconds_between_progress_updates" --verbosity="$common.verbosity" --read-validation-stringency="$common.read_validation_stringency" --interval-set-rule="$common.interval_set_rule" --gatk-config-file="$common.gatk_config_file" --disable-read-filter="$common.disable_read_filter" $common.QUIET $common.use_jdk_deflater $common.use_jdk_inflater $common.lenient $common.disable_tool_default_read_filters $common.add_output_sam_program_record $common.add_output_vcf_command_line #end if ## END COMMON PARAMETERS ## ## OPTIONAL PARAMETERS ## #if str($optional.optional_parameters) == 'yes' #if $optional.population_callset #set datatype = $optional.population_callset.datatype #if $optional.population_callset.is_of_type("vcf_bgzip") ln -s $optional.population_callset population_callset.vcf.gz && tabix population_callset.vcf.gz && #else ln -s $optional.population_callset population_callset.vcf && #end if #end if #if $optional.panel_of_normals #set datatype = $optional.panel_of_normals.datatype #if $optional.panel_of_normals.is_of_type("vcf_bgzip") ln -s $optional.panel_of_normals panel_of_normals.vcf.gz && tabix panel_of_normals.vcf.gz && #else ln -s $optional.panel_of_normals panel_of_normals.vcf && #end if #end if #if $optional.germline_resource #set datatype = $optional.germline_resource.datatype #if $optional.germline_resource.is_of_type("vcf_bgzip") ln -s $optional.germline_resource germline_resource.vcf.gz && tabix germline_resource.vcf.gz && #else ln -s $optional.germline_resource germline_resource.vcf && #end if #end if #if $optional.alleles #set datatype = $optional.alleles.datatype #if $optional.alleles.is_of_type("vcf_bgzip") ln -s $optional.alleles alleles.vcf.gz && tabix alleles.vcf.gz && #else ln -s $optional.alleles alleles.vcf && #end if #end if #if $optional.panel_of_normals #if $optional.panel_of_normals.is_of_type("vcf_bgzip") --panel-of-normals panel_of_normals.vcf.gz #else --panel-of-normals panel_of_normals.vcf #end if #end if #if $optional.pedigree --pedigree="$optional.pedigree" #end if #if $optional.alleles #if $optional.alleles.is_of_type("vcf_bgzip") --alleles alleles.vcf.gz #else --alleles alleles.vcf #end if #end if #if $optional.germline_resource #if $optional.germline_resource.is_of_type("vcf_bgzip") --germline-resource germline_resource.vcf.gz #else --germline-resource germline_resource.vcf #end if #end if #if $optional.population_callset #if $optional.population_callset.is_of_type("vcf_bgzip") --population-callset population_callset.vcf.gz #else --population-callset population_callset.vcf #end if #end if #if $optional.arguments_file --arguments_file="$optional.arguments_file" #end if --base-quality-score-threshold="$optional.base_quality_score_threshold" --cloud-index-prefetch-buffer="$optional.cloud_index_prefetch_buffer" --cloud-prefetch-buffer="$optional.cloud_prefetch_buffer" --annotation="$optional.annotation" --annotation-group="$optional.annotation_group" --annotations-to-exclude="$optional.annotations_to_exclude" --af-of-alleles-not-in-resource="$optional.af_of_alleles_not_in_resource" --contamination-fraction-to-filter="$optional.contamination_fraction_to_filter" --downsampling-stride="$optional.downsampling_stride" --founder-id="$optional.founder_id" --gcs-max-retries="$optional.gcs_max_retries" --genotyping-mode="$optional.genotyping_mode" --heterozygosity="$optional.heterozygosity" --heterozygosity-stdev="$optional.heterozygosity_stdev" --indel-heterozygosity="$optional.indel_heterozygosity" --initial-tumor-lod="$optional.initial_tumor_lod" --interval-merging-rule="$optional.interval_merging_rule" --max-population-af="$optional.max_population_af" --max-reads-per-alignment-start="$optional.max_reads_per_alignment_start" --min-base-quality-score="$optional.min_base_quality_score" --native-pair-hmm-threads="$optional.native_pair_hmm_threads" --normal-lod="$optional.normal_lod" --normal-sample="$optional.normal_sample" --num-reference-samples-if-no-call="$optional.num_reference_samples_if_no_call" --output-mode="$optional.output_mode" --sample-ploidy="$optional.sample_ploidy" --standard-min-confidence-threshold-for-calling="$optional.standard_min_confidence_threshold_for_calling" --tumor-lod-to-emit="$optional.tumor_lod_to_emit" $optional.annotate_with_num_discovered_alleles $optional.disable_bam_index_caching $optional.disable_sequence_dictionary_validation $optional.genotype_germline_sites $optional.genotype_pon_sites $optional.native_pair_hmm_use_double_precision $optional.sites_only_vcf_output $optional.use_new_qual_calculator #end if ## END OPTIONAL PARAMETERS ## ## ADVANCED PARAMETERS ## #if str($advanced.advanced_parameters) == 'yes' --active-probability-threshold="$advanced.active_probability_threshold" --assembly-region-padding="$advanced.assembly_region_padding" --bam-writer-type="$advanced.bam_writer_type" --contamination-fraction-per-sample-file="$advanced.contamination_fraction_per_sample_file" --input-prior="$advanced.input_prior" --kmer-size="$advanced.kmer_size" --max-alternate-alleles="$advanced.max_alternate_alleles" --max-assembly-region-size="$advanced.max_assembly_region_size" --max-genotype-count="$advanced.max_genotype_count" --max-mnp-distance="$advanced.max_mnp_distance" --max-num-haplotypes-in-population="$advanced.max_num_haplotypes_in_population" --max-prob-propagation-distance="$advanced.max_prob_propagation_distance" --max-suspicious-reads-per-alignment-start="$advanced.max_suspicious_reads_per_alignment_start" --min-assembly-region-size="$advanced.min_assembly_region_size" --min-dangling-branch-length="$advanced.min_dangling_branch_length" --min-pruning="$advanced.min_pruning" --num-pruning-samples="$advanced.num_pruning_samples" --pair-hmm-gap-continuation-penalty="$advanced.pair_hmm_gap_continuation_penalty" --pair-hmm-implementation="$advanced.pair_hmm_implementation" --pcr-indel-model="$advanced.pcr_indel_model" --phred-scaled-global-read-mismapping-rate="$advanced.phred_scaled_global_read_mismapping_rate" --smith-waterman="$advanced.smith_waterman" $advanced.all_site_pls $advanced.allow_non_unique_kmers_in_ref $advanced.consensus $advanced.debug $advanced.disable_tool_default_annotations $advanced.do_not_run_physical_phasing $advanced.dont_increase_kmer_sizes_for_cycles $advanced.dont_trim_active_regions $advanced.dont_use_soft_clipped_bases $advanced.enable_all_annotations $advanced.genotype_filtered_alleles $advanced.use_filtered_reads_for_annotations #end if ## END ADVANCED PARAMETERS ## ## ADDITIONAL OUTPUT PARAMETERS ## #if str($advanced.advanced_parameters) == 'yes' --activity-profile-out="$activity_profile_out" --assembly-region-out="$assembly_region_out" --bam-output="$bam_output" --graph-output="$graph_output" #end if #include source=$gatk_excl_ints_chth# #include source=$gatk_ints_chth# #include source=$vcf_output_opts# #include source=$ref_opts# #include source=$gatk_seqdict# #if $tumor_sample --tumor-sample="$tumor_sample" #end if ]]> </command> <inputs> <expand macro="gatk_bam_req_params"/> <expand macro="gzip_vcf_params"/> <expand macro="ref_sel"/> <param name="tumor_sample" argument="--tumor-sample" type="text" optional="false" value="" label="Tumor Sample" help="BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument."/> <conditional name="common"> <param name="common_parameters" type="select" label="Common parameters"> <option value="no">Use internal defaults</option> <option value="yes">Specify parameters</option> </param> <when value="yes"> <expand macro="gatk_excl_ints"/> <expand macro="seq_dict_sel"/> <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/> <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/> <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/> <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/> <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="" label="Gatk Config File" help="A configuration file to use with the GATK."/> <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs"> <option selected="true" value="UNION">UNION</option> <option selected="false" value="INTERSECTION">INTERSECTION</option> </param> <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/> <param name="QUIET" argument="--QUIET" type="boolean" truevalue="--QUIET" falsevalue="" optional="true" checked="false" label="Quiet" help="Whether to suppress job-summary info on System.err."/> <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/> <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."> <option selected="false" value="STRICT">STRICT</option> <option selected="false" value="LENIENT">LENIENT</option> <option selected="true" value="SILENT">SILENT</option> </param> <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/> <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/> <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/> <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging."> <option selected="false" value="ERROR">ERROR</option> <option selected="false" value="WARNING">WARNING</option> <option selected="true" value="INFO">INFO</option> <option selected="false" value="DEBUG">DEBUG</option> </param> </when> <when value="no" /> </conditional> <conditional name="optional"> <param name="optional_parameters" type="select" label="Optional parameters"> <option value="no">Use internal defaults</option> <option value="yes">Specify parameters</option> </param> <when value="yes"> <expand macro="gatk_ints"/> <param name="af_of_alleles_not_in_resource" argument="--af-of-alleles-not-in-resource" type="float" optional="true" value="-1.0" label="Af Of Alleles Not In Resource" help="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf fora derivation of the default value."/> <param name="alleles" argument="--alleles" type="data" optional="true" format="" label="Alleles" help="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/> <param name="annotation" argument="--annotation" type="text" optional="true" value="" label="Annotation" help="One or more specific annotations to add to variant calls"/> <param name="annotation_group" argument="--annotation-group" type="text" optional="true" value="" label="Annotation Group" help="One or more groups of annotations to apply to variant calls"/> <param name="annotations_to_exclude" argument="--annotations-to-exclude" type="text" optional="true" value="" label="Annotations To Exclude" help="One or more specific annotations to exclude from variant calls"/> <param name="arguments_file" argument="--arguments_file" type="data" optional="true" format="" label="Arguments_File" help="read one or more arguments files and add them to the command line"/> <param name="base_quality_score_threshold" argument="--base-quality-score-threshold" type="integer" optional="true" value="18" label="Base Quality Score Threshold" help="Base qualities below this threshold will be reduced to the minimum (6)"/> <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/> <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/> <param name="contamination_fraction_to_filter" argument="--contamination-fraction-to-filter" type="float" optional="true" value="0.0" label="Contamination Fraction To Filter" help="Fraction of contamination in sequencing data (for all samples) to aggressively remove"/> <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/> <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/> <param name="downsampling_stride" argument="--downsampling-stride" type="integer" optional="true" value="1" label="Downsampling Stride" help="Downsample a pool of reads starting within a range of one or more bases."/> <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &quot;founders&quot;"/> <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/> <param name="genotype_germline_sites" argument="--genotype-germline-sites" type="boolean" truevalue="--genotype-germline-sites" falsevalue="" optional="true" checked="false" label="Genotype Germline Sites" help="(EXPERIMENTAL) Call all apparent germline site even though they will ultimately be filtered."/> <param name="genotype_pon_sites" argument="--genotype-pon-sites" type="boolean" truevalue="--genotype-pon-sites" falsevalue="" optional="true" checked="false" label="Genotype Pon Sites" help="Call sites in the PoN even though they will ultimately be filtered."/> <param name="genotyping_mode" argument="--genotyping-mode" type="select" optional="true" label="Genotyping Mode" help="Specifies how to determine the alternate alleles to use for genotyping"> <option selected="true" value="DISCOVERY">DISCOVERY</option> <option selected="false" value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> </param> <param name="germline_resource" argument="--germline-resource" type="data" optional="true" format="" label="Germline Resource" help="Population vcf of germline sequencing containing allele fractions."/> <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/> <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/> <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/> <param name="initial_tumor_lod" argument="--initial-tumor-lod" type="float" optional="true" value="2.0" label="Initial Tumor Lod" help="LOD threshold to consider pileup active."/> <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals"> <option selected="true" value="ALL">ALL</option> <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option> </param> <param name="max_population_af" argument="--max-population-af" type="float" optional="true" value="0.01" label="Max Population Af" help="Maximum population allele frequency in tumor-only mode."/> <param name="max_reads_per_alignment_start" argument="--max-reads-per-alignment-start" type="integer" optional="true" value="50" label="Max Reads Per Alignment Start" help="Maximum number of reads to retain per alignment start position. Reads above this threshold will be downsampled. Set to 0 to disable."/> <param name="min_base_quality_score" argument="--min-base-quality-score" type="integer" optional="true" value="10" label="Min Base Quality Score" help="Minimum base quality required to consider a base for calling"/> <param name="native_pair_hmm_threads" argument="--native-pair-hmm-threads" type="integer" optional="true" value="4" label="Native Pair Hmm Threads" help="How many threads should a native pairHMM implementation use"/> <param name="native_pair_hmm_use_double_precision" argument="--native-pair-hmm-use-double-precision" type="boolean" truevalue="--native-pair-hmm-use-double-precision" falsevalue="" optional="true" checked="false" label="Native Pair Hmm Use Double Precision" help="use double precision in the native pairHmm. This is slower but matches the java implementation better"/> <param name="normal_lod" argument="--normal-lod" type="float" optional="true" value="2.2" label="Normal Lod" help="LOD threshold for calling normal variant non-germline."/> <param name="normal_sample" argument="--normal-sample" type="text" optional="true" value="" label="Normal Sample" help="BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument."/> <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/> <param name="output_mode" argument="--output-mode" type="select" optional="true" label="Output Mode" help="Specifies which type of calls we should output"> <option selected="true" value="EMIT_VARIANTS_ONLY">EMIT_VARIANTS_ONLY</option> <option selected="false" value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> <option selected="false" value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> </param> <param name="panel_of_normals" argument="--panel-of-normals" type="data" optional="true" format="" label="Panel Of Normals" help="VCF file of sites observed in normal."/> <param name="pedigree" argument="--pedigree" type="data" optional="true" format="" label="Pedigree" help="Pedigree file for determining the population &quot;founders&quot;"/> <param name="population_callset" argument="--population-callset" type="data" optional="true" format="" label="Population Callset" help="Callset to use in calculating genotype priors"/> <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/> <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&apos;t emit genotype fields when writing vcf file output."/> <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/> <param name="tumor_lod_to_emit" argument="--tumor-lod-to-emit" type="float" optional="true" value="3.0" label="Tumor Lod To Emit" help="LOD threshold to emit tumor variant to VCF."/> <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/> </when> <when value="no" /> </conditional> <conditional name="advanced"> <param name="advanced_parameters" type="select" label="Advanced parameters"> <option value="no">Use internal defaults</option> <option value="yes">Specify parameters</option> </param> <when value="yes"> <param name="active_probability_threshold" argument="--active-probability-threshold" type="float" optional="true" value="0.002" label="Active Probability Threshold" help="Minimum probability for a locus to be considered active."/> <param name="all_site_pls" argument="--all-site-pls" type="boolean" truevalue="--all-site-pls" falsevalue="" optional="true" checked="false" label="All Site Pls" help="Annotate all sites with PLs"/> <param name="allow_non_unique_kmers_in_ref" argument="--allow-non-unique-kmers-in-ref" type="boolean" truevalue="--allow-non-unique-kmers-in-ref" falsevalue="" optional="true" checked="false" label="Allow Non Unique Kmers In Ref" help="Allow graphs that have non-unique kmers in the reference"/> <param name="assembly_region_padding" argument="--assembly-region-padding" type="integer" optional="true" value="100" label="Assembly Region Padding" help="Number of additional bases of context to include around each assembly region"/> <param name="bam_writer_type" argument="--bam-writer-type" type="select" optional="true" label="Bam Writer Type" help="Which haplotypes should be written to the BAM"> <option selected="false" value="ALL_POSSIBLE_HAPLOTYPES">ALL_POSSIBLE_HAPLOTYPES</option> <option selected="true" value="CALLED_HAPLOTYPES">CALLED_HAPLOTYPES</option> </param> <param name="consensus" argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" optional="true" checked="false" label="Consensus" help="1000G consensus mode"/> <param name="contamination_fraction_per_sample_file" argument="--contamination-fraction-per-sample-file" type="data" optional="true" format="" label="Contamination Fraction Per Sample File" help="Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be &quot;&lt;SampleID&gt;&lt;TAB&gt;&lt;Contamination&gt;&quot; (Contamination is double) per line; No header."/> <param name="debug" argument="--debug" type="boolean" truevalue="--debug" falsevalue="" optional="true" checked="false" label="Debug" help="Print out very verbose debug information about each triggering active region"/> <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/> <param name="do_not_run_physical_phasing" argument="--do-not-run-physical-phasing" type="boolean" truevalue="--do-not-run-physical-phasing" falsevalue="" optional="true" checked="false" label="Do Not Run Physical Phasing" help="Disable physical phasing"/> <param name="dont_increase_kmer_sizes_for_cycles" argument="--dont-increase-kmer-sizes-for-cycles" type="boolean" truevalue="--dont-increase-kmer-sizes-for-cycles" falsevalue="" optional="true" checked="false" label="Dont Increase Kmer Sizes For Cycles" help="Disable iterating over kmer sizes when graph cycles are detected"/> <param name="dont_trim_active_regions" argument="--dont-trim-active-regions" type="boolean" truevalue="--dont-trim-active-regions" falsevalue="" optional="true" checked="false" label="Dont Trim Active Regions" help="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping"/> <param name="dont_use_soft_clipped_bases" argument="--dont-use-soft-clipped-bases" type="boolean" truevalue="--dont-use-soft-clipped-bases" falsevalue="" optional="true" checked="false" label="Dont Use Soft Clipped Bases" help="Do not analyze soft clipped bases in the reads"/> <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/> <param name="genotype_filtered_alleles" argument="--genotype-filtered-alleles" type="boolean" truevalue="--genotype-filtered-alleles" falsevalue="" optional="true" checked="false" label="Genotype Filtered Alleles" help="Whether to genotype all given alleles, even filtered ones, --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/> <param name="kmer_size" argument="--kmer-size" type="integer" optional="true" value="" label="Kmer Size" help="Kmer size to use in the read threading assembler"/> <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/> <param name="max_assembly_region_size" argument="--max-assembly-region-size" type="integer" optional="true" value="300" label="Max Assembly Region Size" help="Maximum size of an assembly region"/> <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/> <param name="max_mnp_distance" argument="--max-mnp-distance" type="integer" optional="true" value="1" label="Max Mnp Distance" help="Two or more phased substitutions separated by this distance or less are merged into MNPs."/> <param name="max_num_haplotypes_in_population" argument="--max-num-haplotypes-in-population" type="integer" optional="true" value="128" label="Max Num Haplotypes In Population" help="Maximum number of haplotypes to consider for your population"/> <param name="max_prob_propagation_distance" argument="--max-prob-propagation-distance" type="integer" optional="true" value="50" label="Max Prob Propagation Distance" help="Upper limit on how many bases away probability mass can be moved around when calculating the boundaries between active and inactive assembly regions"/> <param name="max_suspicious_reads_per_alignment_start" argument="--max-suspicious-reads-per-alignment-start" type="integer" optional="true" value="0" label="Max Suspicious Reads Per Alignment Start" help="Maximum number of suspicious reads (mediocre mapping quality or too many substitutions) allowed in a downsampling stride. Set to 0 to disable."/> <param name="min_assembly_region_size" argument="--min-assembly-region-size" type="integer" optional="true" value="50" label="Min Assembly Region Size" help="Minimum size of an assembly region"/> <param name="min_dangling_branch_length" argument="--min-dangling-branch-length" type="integer" optional="true" value="4" label="Min Dangling Branch Length" help="Minimum length of a dangling branch to attempt recovery"/> <param name="min_pruning" argument="--min-pruning" type="integer" optional="true" value="2" label="Min Pruning" help="Minimum support to not prune paths in the graph"/> <param name="num_pruning_samples" argument="--num-pruning-samples" type="integer" optional="true" value="1" label="Num Pruning Samples" help="Number of samples that must pass the minPruning threshold"/> <param name="pair_hmm_gap_continuation_penalty" argument="--pair-hmm-gap-continuation-penalty" type="integer" optional="true" value="10" label="Pair Hmm Gap Continuation Penalty" help="Flat gap continuation penalty for use in the Pair HMM"/> <param name="pair_hmm_implementation" argument="--pair-hmm-implementation" type="select" optional="true" label="Pair Hmm Implementation" help="The PairHMM implementation to use for genotype likelihood calculations"> <option selected="false" value="EXACT">EXACT</option> <option selected="false" value="ORIGINAL">ORIGINAL</option> <option selected="false" value="LOGLESS_CACHING">LOGLESS_CACHING</option> <option selected="false" value="AVX_LOGLESS_CACHING">AVX_LOGLESS_CACHING</option> <option selected="false" value="AVX_LOGLESS_CACHING_OMP">AVX_LOGLESS_CACHING_OMP</option> <option selected="false" value="EXPERIMENTAL_FPGA_LOGLESS_CACHING">EXPERIMENTAL_FPGA_LOGLESS_CACHING</option> <option selected="true" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> </param> <param name="pcr_indel_model" argument="--pcr-indel-model" type="select" optional="true" label="Pcr Indel Model" help="The PCR indel model to use"> <option selected="false" value="NONE">NONE</option> <option selected="false" value="HOSTILE">HOSTILE</option> <option selected="false" value="AGGRESSIVE">AGGRESSIVE</option> <option selected="true" value="CONSERVATIVE">CONSERVATIVE</option> </param> <param name="phred_scaled_global_read_mismapping_rate" argument="--phred-scaled-global-read-mismapping-rate" type="integer" optional="true" value="45" label="Phred Scaled Global Read Mismapping Rate" help="The global assumed mismapping rate for reads"/> <param name="smith_waterman" argument="--smith-waterman" type="select" optional="true" label="Smith Waterman" help="Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice"> <option selected="false" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> <option selected="false" value="AVX_ENABLED">AVX_ENABLED</option> <option selected="true" value="JAVA">JAVA</option> </param> <param name="use_filtered_reads_for_annotations" argument="--use-filtered-reads-for-annotations" type="boolean" truevalue="--use-filtered-reads-for-annotations" falsevalue="" optional="true" checked="false" label="Use Filtered Reads For Annotations" help="Use the contamination-filtered read maps for the purposes of annotating variants"/> </when> <when value="no" /> </conditional> <conditional name="output"> <param name="output_parameters" type="select" label="Output parameters"> <option value="no">Use internal defaults</option> <option value="yes">Specify parameters</option> </param> <when value="yes"> <param name="activity_profile_out_sel" argument="--activity_profile_out_sel" type="boolean" truevalue="--activity_profile_out_sel" falsevalue="" optional="true" checked="false" label="Activity Profile Out" help="Output the raw activity profile results in IGV format"/> <param name="assembly_region_out_sel" argument="--assembly_region_out_sel" type="boolean" truevalue="--assembly_region_out_sel" falsevalue="" optional="true" checked="false" label="Assembly Region Out" help="Output the assembly region to this IGV formatted file"/> <param name="bam_output_sel" argument="--bam_output_sel" type="boolean" truevalue="--bam_output_sel" falsevalue="" optional="true" checked="false" label="Bam Output" help="File to which assembled haplotypes should be written"/> <param name="graph_output_sel" argument="--graph_output_sel" type="boolean" truevalue="--graph_output_sel" falsevalue="" optional="true" checked="false" label="Graph Output" help="Write debug assembly graph information to this file"/> </when> <when value="no" /> </conditional> </inputs> <outputs> <expand macro="gzip_vcf_output_params"/> <data format="tabular" name="activity_profile_out" label="${tool.name} on ${on_string}: activity_profile_out tabular"> <filter>output_opt['activity_profile_out_sel']</filter> </data> <data format="tabular" name="assembly_region_out" label="${tool.name} on ${on_string}: assembly_region_out tabular"> <filter>output_opt['assembly_region_out_sel']</filter> </data> <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: bam_output bam"> <filter>output_opt['bam_output_sel']</filter> </data> <data format="txt" name="graph_output" label="${tool.name} on ${on_string}: graph_output txt"> <filter>output_opt['graph_output_sel']</filter> </data> </outputs> <tests> <test /> </tests> <help><![CDATA[Call somatic short variants via local assembly of haplotypes. Short variants include single nucleotide (SNV) and insertion and deletion (indel) variants. The caller combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (`Cibulskis et al., 2013 <http://www.nature.com/nbt/journal/v31/n3/full/nbt.2514.html>`__) with the assembly-based machinery of `HaplotypeCaller <https://www.broadinstitute.org/gatk/documentation/tooldocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php>`__. This tool is featured in the *Somatic Short Mutation calling Best Practice Workflow*. See `Tutorial#11136 <https://software.broadinstitute.org/gatk/documentation/article?id=11136>`__ for a step-by-step description of the workflow and `Article#11127 <https://software.broadinstitute.org/gatk/documentation/article?id=11127>`__ for an overview of what traditional somatic calling entails. For the latest pipeline scripts, see the `Mutect2 WDL scripts directory <https://github.com/broadinstitute/gatk/tree/master/scripts/mutect2_wdl>`__. Although we present the tool for somatic calling, it may apply to other contexts, such as mitochondrial variant calling. Usage examples ~~~~~~~~~~~~~~ Example commands show how to run Mutect2 for typical scenarios. The two modes are (i) *somatic mode* where a tumor sample is matched with a normal sample in analysis and (ii) *tumor-only mode* where a single sample's alignment data undergoes analysis. (i) Tumor with matched normal ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Given a matched normal, Mutect2 is designed to call somatic variants only. The tool includes logic to skip emitting variants that are clearly present in the germline based on provided evidence, e.g. in the matched normal. This is done at an early stage to avoid spending computational resources on germline events. If the variant's germline status is borderline, then Mutect2 will emit the variant to the callset for subsequent filtering and review. :: gatk Mutect2 \ -R reference.fa \ -I tumor.bam \ -tumor tumor_sample_name \ -I normal.bam \ -normal normal_sample_name \ --germline-resource af-only-gnomad.vcf.gz \ --af-of-alleles-not-in-resource 0.00003125 \ --panel-of-normals pon.vcf.gz \ -O somatic.vcf.gz The --af-of-alleles-not-in-resource argument value should match expectations for alleles not found in the provided germline resource. Note the tool does not require a germline resource nor a panel of normals (PoN) to run. The tool prefilters sites for the matched normal and the PoN. For the germline resource, the tool prefilters on the allele. Below is an excerpt of a known variants resource with population allele frequencies :: #CHROM POS ID REF ALT QUAL FILTER INFO 1 10067 . T TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 30.35 PASS AC=3;AF=7.384E-5 1 10108 . CAACCCT C 46514.32 PASS AC=6;AF=1.525E-4 1 10109 . AACCCTAACCCT AAACCCT,* 89837.27 PASS AC=48,5;AF=0.001223,1.273E-4 1 10114 . TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA *,CAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA,T 36728.97 PASS AC=55,9,1;AF=0.001373,2.246E-4,2.496E-5 1 10119 . CT C,* 251.23 PASS AC=5,1;AF=1.249E-4,2.498E-5 1 10120 . TA CA,* 14928.74 PASS AC=10,6;AF=2.5E-4,1.5E-4 1 10128 . ACCCTAACCCTAACCCTAAC A,* 285.71 PASS AC=3,1;AF=7.58E-5,2.527E-5 1 10131 . CT C,* 378.93 PASS AC=7,5;AF=1.765E-4,1.261E-4 1 10132 . TAACCC *,T 18025.11 PASS AC=12,2;AF=3.03E-4,5.049E-5 (ii) Tumor-only mode ^^^^^^^^^^^^^^^^^^^^ This mode runs on a single sample, e.g. single tumor or single normal sample. To create a PoN, call on each normal sample in this mode, then use CreateSomaticPanelOfNormals to generate the PoN. :: gatk Mutect2 \ -R reference.fa \ -I sample.bam \ -tumor sample_name \ -O single_sample.vcf.gz Further points of interest ~~~~~~~~~~~~~~~~~~~~~~~~~~ Additional parameters that factor towards filtering, including normal-artifact-lod (default threshold 0.0) and tumor-lod (default threshold 5.3), are available in FilterMutectCalls. While the tool calculates normal-lod assuming a diploid genotype, it calculates normal-artifact-lod with the same approach it uses for tumor-lod, i.e. with a variable ploidy assumption. - If the normal artifact log odds becomes large, then FilterMutectCalls applies the artifact-in-normal filter. For matched normal samples with tumor contamination, consider increasing the normal-artifact-lod threshold. - The tumor log odds, which is calculated independently of any matched normal, determines whether to filter a tumor variant. Variants with tumor LODs exceeding the threshold pass filtering. If a variant is absent from a given germline resource, then the value for --af-of-alleles-not-in-resource applies. For example, gnomAD's 16,000 samples (~32,000 homologs per locus) becomes a probability of one in 32,000 or less. Thus, an allele's absence from the germline resource becomes evidence that it is not a germline variant. Caveats ~~~~~~~ Although GATK4 Mutect2 accomodates varying coverage depths, further optimization of parameters may improve calling for extreme high depths, e.g. 1000X. ]]></help> <citations> <expand macro="citations"/> </citations> </tool>