Mercurial > repos > dave > gatk4_mutect2
diff gatk4_Mutect2.xml @ 0:c27f4eb641bf draft
Uploaded
| author | dave |
|---|---|
| date | Thu, 26 Sep 2019 09:23:23 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gatk4_Mutect2.xml Thu Sep 26 09:23:23 2019 -0400 @@ -0,0 +1,551 @@ +<tool id="gatk4_mutect2" name="GATK4 Mutect2" version="@WRAPPER_VERSION@0" profile="18.05"> + <description>- Call somatic SNVs and indels via local assembly of haplotypes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_cmd"/> + <command detect_errors="exit_code"> + <![CDATA[ + #include source=$set_sections# + #include source=$pre_gatk_excl_ints_chth# + #include source=$bam_index_pre_chth# + #include source=$pre_gatk_ints_chth# + ln -s '$reference_source.reference_sequence' reference.fa && + samtools faidx reference.fa && + gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && + @CMD_BEGIN@ Mutect2 + + #include source=$gatk_bam_input# + + ## COMMON PARAMETERS ## + + #if str($common.common_parameters) == 'yes' + + --read-filter="$common.read_filter" + --seconds-between-progress-updates="$common.seconds_between_progress_updates" + --verbosity="$common.verbosity" + --read-validation-stringency="$common.read_validation_stringency" + --interval-set-rule="$common.interval_set_rule" + --gatk-config-file="$common.gatk_config_file" + --disable-read-filter="$common.disable_read_filter" + $common.QUIET + $common.use_jdk_deflater + $common.use_jdk_inflater + $common.lenient + $common.disable_tool_default_read_filters + $common.add_output_sam_program_record + $common.add_output_vcf_command_line + + #end if + + ## END COMMON PARAMETERS ## + + ## OPTIONAL PARAMETERS ## + + #if str($optional.optional_parameters) == 'yes' + + #if $optional.population_callset + #set datatype = $optional.population_callset.datatype + #if $optional.population_callset.is_of_type("vcf_bgzip") + ln -s $optional.population_callset population_callset.vcf.gz && + tabix population_callset.vcf.gz && + #else + ln -s $optional.population_callset population_callset.vcf && + #end if + #end if + + #if $optional.panel_of_normals + #set datatype = $optional.panel_of_normals.datatype + #if $optional.panel_of_normals.is_of_type("vcf_bgzip") + ln -s $optional.panel_of_normals panel_of_normals.vcf.gz && + tabix panel_of_normals.vcf.gz && + #else + ln -s $optional.panel_of_normals panel_of_normals.vcf && + #end if + #end if + + #if $optional.germline_resource + #set datatype = $optional.germline_resource.datatype + #if $optional.germline_resource.is_of_type("vcf_bgzip") + ln -s $optional.germline_resource germline_resource.vcf.gz && + tabix germline_resource.vcf.gz && + #else + ln -s $optional.germline_resource germline_resource.vcf && + #end if + #end if + + #if $optional.alleles + #set datatype = $optional.alleles.datatype + #if $optional.alleles.is_of_type("vcf_bgzip") + ln -s $optional.alleles alleles.vcf.gz && + tabix alleles.vcf.gz && + #else + ln -s $optional.alleles alleles.vcf && + #end if + #end if + + #if $optional.panel_of_normals + #if $optional.panel_of_normals.is_of_type("vcf_bgzip") + --panel-of-normals panel_of_normals.vcf.gz + #else + --panel-of-normals panel_of_normals.vcf + #end if + #end if + + #if $optional.pedigree + --pedigree="$optional.pedigree" + #end if + + #if $optional.alleles + #if $optional.alleles.is_of_type("vcf_bgzip") + --alleles alleles.vcf.gz + #else + --alleles alleles.vcf + #end if + #end if + + #if $optional.germline_resource + #if $optional.germline_resource.is_of_type("vcf_bgzip") + --germline-resource germline_resource.vcf.gz + #else + --germline-resource germline_resource.vcf + #end if + #end if + + #if $optional.population_callset + #if $optional.population_callset.is_of_type("vcf_bgzip") + --population-callset population_callset.vcf.gz + #else + --population-callset population_callset.vcf + #end if + #end if + + #if $optional.arguments_file + --arguments_file="$optional.arguments_file" + #end if + + --base-quality-score-threshold="$optional.base_quality_score_threshold" + --cloud-index-prefetch-buffer="$optional.cloud_index_prefetch_buffer" + --cloud-prefetch-buffer="$optional.cloud_prefetch_buffer" + --annotation="$optional.annotation" + --annotation-group="$optional.annotation_group" + --annotations-to-exclude="$optional.annotations_to_exclude" + --af-of-alleles-not-in-resource="$optional.af_of_alleles_not_in_resource" + --contamination-fraction-to-filter="$optional.contamination_fraction_to_filter" + --downsampling-stride="$optional.downsampling_stride" + --founder-id="$optional.founder_id" + --gcs-max-retries="$optional.gcs_max_retries" + --genotyping-mode="$optional.genotyping_mode" + --heterozygosity="$optional.heterozygosity" + --heterozygosity-stdev="$optional.heterozygosity_stdev" + --indel-heterozygosity="$optional.indel_heterozygosity" + --initial-tumor-lod="$optional.initial_tumor_lod" + --interval-merging-rule="$optional.interval_merging_rule" + --max-population-af="$optional.max_population_af" + --max-reads-per-alignment-start="$optional.max_reads_per_alignment_start" + --min-base-quality-score="$optional.min_base_quality_score" + --native-pair-hmm-threads="$optional.native_pair_hmm_threads" + --normal-lod="$optional.normal_lod" + --normal-sample="$optional.normal_sample" + --num-reference-samples-if-no-call="$optional.num_reference_samples_if_no_call" + --output-mode="$optional.output_mode" + --sample-ploidy="$optional.sample_ploidy" + --standard-min-confidence-threshold-for-calling="$optional.standard_min_confidence_threshold_for_calling" + --tumor-lod-to-emit="$optional.tumor_lod_to_emit" + $optional.annotate_with_num_discovered_alleles + $optional.disable_bam_index_caching + $optional.disable_sequence_dictionary_validation + $optional.genotype_germline_sites + $optional.genotype_pon_sites + $optional.native_pair_hmm_use_double_precision + $optional.sites_only_vcf_output + $optional.use_new_qual_calculator + #end if + + ## END OPTIONAL PARAMETERS ## + + ## ADVANCED PARAMETERS ## + + #if str($advanced.advanced_parameters) == 'yes' + --active-probability-threshold="$advanced.active_probability_threshold" + --assembly-region-padding="$advanced.assembly_region_padding" + --bam-writer-type="$advanced.bam_writer_type" + --contamination-fraction-per-sample-file="$advanced.contamination_fraction_per_sample_file" + --input-prior="$advanced.input_prior" + --kmer-size="$advanced.kmer_size" + --max-alternate-alleles="$advanced.max_alternate_alleles" + --max-assembly-region-size="$advanced.max_assembly_region_size" + --max-genotype-count="$advanced.max_genotype_count" + --max-mnp-distance="$advanced.max_mnp_distance" + --max-num-haplotypes-in-population="$advanced.max_num_haplotypes_in_population" + --max-prob-propagation-distance="$advanced.max_prob_propagation_distance" + --max-suspicious-reads-per-alignment-start="$advanced.max_suspicious_reads_per_alignment_start" + --min-assembly-region-size="$advanced.min_assembly_region_size" + --min-dangling-branch-length="$advanced.min_dangling_branch_length" + --min-pruning="$advanced.min_pruning" + --num-pruning-samples="$advanced.num_pruning_samples" + --pair-hmm-gap-continuation-penalty="$advanced.pair_hmm_gap_continuation_penalty" + --pair-hmm-implementation="$advanced.pair_hmm_implementation" + --pcr-indel-model="$advanced.pcr_indel_model" + --phred-scaled-global-read-mismapping-rate="$advanced.phred_scaled_global_read_mismapping_rate" + --smith-waterman="$advanced.smith_waterman" + $advanced.all_site_pls + $advanced.allow_non_unique_kmers_in_ref + $advanced.consensus + $advanced.debug + $advanced.disable_tool_default_annotations + $advanced.do_not_run_physical_phasing + $advanced.dont_increase_kmer_sizes_for_cycles + $advanced.dont_trim_active_regions + $advanced.dont_use_soft_clipped_bases + $advanced.enable_all_annotations + $advanced.genotype_filtered_alleles + $advanced.use_filtered_reads_for_annotations + + #end if + + ## END ADVANCED PARAMETERS ## + + ## ADDITIONAL OUTPUT PARAMETERS ## + + #if str($advanced.advanced_parameters) == 'yes' + + --activity-profile-out="$activity_profile_out" + --assembly-region-out="$assembly_region_out" + --bam-output="$bam_output" + --graph-output="$graph_output" + + #end if + + #include source=$gatk_excl_ints_chth# + #include source=$gatk_ints_chth# + #include source=$vcf_output_opts# + #include source=$ref_opts# + #include source=$gatk_seqdict# + #if $tumor_sample + --tumor-sample="$tumor_sample" + #end if + ]]> + </command> + <inputs> + <expand macro="gatk_bam_req_params"/> + <expand macro="gzip_vcf_params"/> + <expand macro="ref_sel"/> + <param name="tumor_sample" argument="--tumor-sample" type="text" optional="false" value="" label="Tumor Sample" help="BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument."/> + <conditional name="common"> + <param name="common_parameters" type="select" label="Common parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <expand macro="gatk_excl_ints"/> + <expand macro="seq_dict_sel"/> + <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/> + <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/> + <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/> + <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/> + <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="" label="Gatk Config File" help="A configuration file to use with the GATK."/> + <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs"> + <option selected="true" value="UNION">UNION</option> + <option selected="false" value="INTERSECTION">INTERSECTION</option> + </param> + <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/> + <param name="QUIET" argument="--QUIET" type="boolean" truevalue="--QUIET" falsevalue="" optional="true" checked="false" label="Quiet" help="Whether to suppress job-summary info on System.err."/> + <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/> + <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."> + <option selected="false" value="STRICT">STRICT</option> + <option selected="false" value="LENIENT">LENIENT</option> + <option selected="true" value="SILENT">SILENT</option> + </param> + <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/> + <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/> + <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/> + <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging."> + <option selected="false" value="ERROR">ERROR</option> + <option selected="false" value="WARNING">WARNING</option> + <option selected="true" value="INFO">INFO</option> + <option selected="false" value="DEBUG">DEBUG</option> + </param> + </when> + <when value="no" /> + </conditional> + <conditional name="optional"> + <param name="optional_parameters" type="select" label="Optional parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <expand macro="gatk_ints"/> + <param name="af_of_alleles_not_in_resource" argument="--af-of-alleles-not-in-resource" type="float" optional="true" value="-1.0" label="Af Of Alleles Not In Resource" help="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf fora derivation of the default value."/> + <param name="alleles" argument="--alleles" type="data" optional="true" format="" label="Alleles" help="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> + <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/> + <param name="annotation" argument="--annotation" type="text" optional="true" value="" label="Annotation" help="One or more specific annotations to add to variant calls"/> + <param name="annotation_group" argument="--annotation-group" type="text" optional="true" value="" label="Annotation Group" help="One or more groups of annotations to apply to variant calls"/> + <param name="annotations_to_exclude" argument="--annotations-to-exclude" type="text" optional="true" value="" label="Annotations To Exclude" help="One or more specific annotations to exclude from variant calls"/> + <param name="arguments_file" argument="--arguments_file" type="data" optional="true" format="" label="Arguments_File" help="read one or more arguments files and add them to the command line"/> + <param name="base_quality_score_threshold" argument="--base-quality-score-threshold" type="integer" optional="true" value="18" label="Base Quality Score Threshold" help="Base qualities below this threshold will be reduced to the minimum (6)"/> + <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/> + <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/> + <param name="contamination_fraction_to_filter" argument="--contamination-fraction-to-filter" type="float" optional="true" value="0.0" label="Contamination Fraction To Filter" help="Fraction of contamination in sequencing data (for all samples) to aggressively remove"/> + <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/> + <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/> + <param name="downsampling_stride" argument="--downsampling-stride" type="integer" optional="true" value="1" label="Downsampling Stride" help="Downsample a pool of reads starting within a range of one or more bases."/> + <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &quot;founders&quot;"/> + <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/> + <param name="genotype_germline_sites" argument="--genotype-germline-sites" type="boolean" truevalue="--genotype-germline-sites" falsevalue="" optional="true" checked="false" label="Genotype Germline Sites" help="(EXPERIMENTAL) Call all apparent germline site even though they will ultimately be filtered."/> + <param name="genotype_pon_sites" argument="--genotype-pon-sites" type="boolean" truevalue="--genotype-pon-sites" falsevalue="" optional="true" checked="false" label="Genotype Pon Sites" help="Call sites in the PoN even though they will ultimately be filtered."/> + <param name="genotyping_mode" argument="--genotyping-mode" type="select" optional="true" label="Genotyping Mode" help="Specifies how to determine the alternate alleles to use for genotyping"> + <option selected="true" value="DISCOVERY">DISCOVERY</option> + <option selected="false" value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> + </param> + <param name="germline_resource" argument="--germline-resource" type="data" optional="true" format="" label="Germline Resource" help="Population vcf of germline sequencing containing allele fractions."/> + <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/> + <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/> + <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/> + <param name="initial_tumor_lod" argument="--initial-tumor-lod" type="float" optional="true" value="2.0" label="Initial Tumor Lod" help="LOD threshold to consider pileup active."/> + <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals"> + <option selected="true" value="ALL">ALL</option> + <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option> + </param> + <param name="max_population_af" argument="--max-population-af" type="float" optional="true" value="0.01" label="Max Population Af" help="Maximum population allele frequency in tumor-only mode."/> + <param name="max_reads_per_alignment_start" argument="--max-reads-per-alignment-start" type="integer" optional="true" value="50" label="Max Reads Per Alignment Start" help="Maximum number of reads to retain per alignment start position. Reads above this threshold will be downsampled. Set to 0 to disable."/> + <param name="min_base_quality_score" argument="--min-base-quality-score" type="integer" optional="true" value="10" label="Min Base Quality Score" help="Minimum base quality required to consider a base for calling"/> + <param name="native_pair_hmm_threads" argument="--native-pair-hmm-threads" type="integer" optional="true" value="4" label="Native Pair Hmm Threads" help="How many threads should a native pairHMM implementation use"/> + <param name="native_pair_hmm_use_double_precision" argument="--native-pair-hmm-use-double-precision" type="boolean" truevalue="--native-pair-hmm-use-double-precision" falsevalue="" optional="true" checked="false" label="Native Pair Hmm Use Double Precision" help="use double precision in the native pairHmm. This is slower but matches the java implementation better"/> + <param name="normal_lod" argument="--normal-lod" type="float" optional="true" value="2.2" label="Normal Lod" help="LOD threshold for calling normal variant non-germline."/> + <param name="normal_sample" argument="--normal-sample" type="text" optional="true" value="" label="Normal Sample" help="BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument."/> + <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/> + <param name="output_mode" argument="--output-mode" type="select" optional="true" label="Output Mode" help="Specifies which type of calls we should output"> + <option selected="true" value="EMIT_VARIANTS_ONLY">EMIT_VARIANTS_ONLY</option> + <option selected="false" value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> + <option selected="false" value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> + </param> + <param name="panel_of_normals" argument="--panel-of-normals" type="data" optional="true" format="" label="Panel Of Normals" help="VCF file of sites observed in normal."/> + <param name="pedigree" argument="--pedigree" type="data" optional="true" format="" label="Pedigree" help="Pedigree file for determining the population &quot;founders&quot;"/> + <param name="population_callset" argument="--population-callset" type="data" optional="true" format="" label="Population Callset" help="Callset to use in calculating genotype priors"/> + <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/> + <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&apos;t emit genotype fields when writing vcf file output."/> + <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/> + <param name="tumor_lod_to_emit" argument="--tumor-lod-to-emit" type="float" optional="true" value="3.0" label="Tumor Lod To Emit" help="LOD threshold to emit tumor variant to VCF."/> + <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/> + </when> + <when value="no" /> + </conditional> + <conditional name="advanced"> + <param name="advanced_parameters" type="select" label="Advanced parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <param name="active_probability_threshold" argument="--active-probability-threshold" type="float" optional="true" value="0.002" label="Active Probability Threshold" help="Minimum probability for a locus to be considered active."/> + <param name="all_site_pls" argument="--all-site-pls" type="boolean" truevalue="--all-site-pls" falsevalue="" optional="true" checked="false" label="All Site Pls" help="Annotate all sites with PLs"/> + <param name="allow_non_unique_kmers_in_ref" argument="--allow-non-unique-kmers-in-ref" type="boolean" truevalue="--allow-non-unique-kmers-in-ref" falsevalue="" optional="true" checked="false" label="Allow Non Unique Kmers In Ref" help="Allow graphs that have non-unique kmers in the reference"/> + <param name="assembly_region_padding" argument="--assembly-region-padding" type="integer" optional="true" value="100" label="Assembly Region Padding" help="Number of additional bases of context to include around each assembly region"/> + <param name="bam_writer_type" argument="--bam-writer-type" type="select" optional="true" label="Bam Writer Type" help="Which haplotypes should be written to the BAM"> + <option selected="false" value="ALL_POSSIBLE_HAPLOTYPES">ALL_POSSIBLE_HAPLOTYPES</option> + <option selected="true" value="CALLED_HAPLOTYPES">CALLED_HAPLOTYPES</option> + </param> + <param name="consensus" argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" optional="true" checked="false" label="Consensus" help="1000G consensus mode"/> + <param name="contamination_fraction_per_sample_file" argument="--contamination-fraction-per-sample-file" type="data" optional="true" format="" label="Contamination Fraction Per Sample File" help="Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be &quot;&lt;SampleID&gt;&lt;TAB&gt;&lt;Contamination&gt;&quot; (Contamination is double) per line; No header."/> + <param name="debug" argument="--debug" type="boolean" truevalue="--debug" falsevalue="" optional="true" checked="false" label="Debug" help="Print out very verbose debug information about each triggering active region"/> + <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/> + <param name="do_not_run_physical_phasing" argument="--do-not-run-physical-phasing" type="boolean" truevalue="--do-not-run-physical-phasing" falsevalue="" optional="true" checked="false" label="Do Not Run Physical Phasing" help="Disable physical phasing"/> + <param name="dont_increase_kmer_sizes_for_cycles" argument="--dont-increase-kmer-sizes-for-cycles" type="boolean" truevalue="--dont-increase-kmer-sizes-for-cycles" falsevalue="" optional="true" checked="false" label="Dont Increase Kmer Sizes For Cycles" help="Disable iterating over kmer sizes when graph cycles are detected"/> + <param name="dont_trim_active_regions" argument="--dont-trim-active-regions" type="boolean" truevalue="--dont-trim-active-regions" falsevalue="" optional="true" checked="false" label="Dont Trim Active Regions" help="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping"/> + <param name="dont_use_soft_clipped_bases" argument="--dont-use-soft-clipped-bases" type="boolean" truevalue="--dont-use-soft-clipped-bases" falsevalue="" optional="true" checked="false" label="Dont Use Soft Clipped Bases" help="Do not analyze soft clipped bases in the reads"/> + <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/> + <param name="genotype_filtered_alleles" argument="--genotype-filtered-alleles" type="boolean" truevalue="--genotype-filtered-alleles" falsevalue="" optional="true" checked="false" label="Genotype Filtered Alleles" help="Whether to genotype all given alleles, even filtered ones, --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> + <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/> + <param name="kmer_size" argument="--kmer-size" type="integer" optional="true" value="" label="Kmer Size" help="Kmer size to use in the read threading assembler"/> + <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/> + <param name="max_assembly_region_size" argument="--max-assembly-region-size" type="integer" optional="true" value="300" label="Max Assembly Region Size" help="Maximum size of an assembly region"/> + <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/> + <param name="max_mnp_distance" argument="--max-mnp-distance" type="integer" optional="true" value="1" label="Max Mnp Distance" help="Two or more phased substitutions separated by this distance or less are merged into MNPs."/> + <param name="max_num_haplotypes_in_population" argument="--max-num-haplotypes-in-population" type="integer" optional="true" value="128" label="Max Num Haplotypes In Population" help="Maximum number of haplotypes to consider for your population"/> + <param name="max_prob_propagation_distance" argument="--max-prob-propagation-distance" type="integer" optional="true" value="50" label="Max Prob Propagation Distance" help="Upper limit on how many bases away probability mass can be moved around when calculating the boundaries between active and inactive assembly regions"/> + <param name="max_suspicious_reads_per_alignment_start" argument="--max-suspicious-reads-per-alignment-start" type="integer" optional="true" value="0" label="Max Suspicious Reads Per Alignment Start" help="Maximum number of suspicious reads (mediocre mapping quality or too many substitutions) allowed in a downsampling stride. Set to 0 to disable."/> + <param name="min_assembly_region_size" argument="--min-assembly-region-size" type="integer" optional="true" value="50" label="Min Assembly Region Size" help="Minimum size of an assembly region"/> + <param name="min_dangling_branch_length" argument="--min-dangling-branch-length" type="integer" optional="true" value="4" label="Min Dangling Branch Length" help="Minimum length of a dangling branch to attempt recovery"/> + <param name="min_pruning" argument="--min-pruning" type="integer" optional="true" value="2" label="Min Pruning" help="Minimum support to not prune paths in the graph"/> + <param name="num_pruning_samples" argument="--num-pruning-samples" type="integer" optional="true" value="1" label="Num Pruning Samples" help="Number of samples that must pass the minPruning threshold"/> + <param name="pair_hmm_gap_continuation_penalty" argument="--pair-hmm-gap-continuation-penalty" type="integer" optional="true" value="10" label="Pair Hmm Gap Continuation Penalty" help="Flat gap continuation penalty for use in the Pair HMM"/> + <param name="pair_hmm_implementation" argument="--pair-hmm-implementation" type="select" optional="true" label="Pair Hmm Implementation" help="The PairHMM implementation to use for genotype likelihood calculations"> + <option selected="false" value="EXACT">EXACT</option> + <option selected="false" value="ORIGINAL">ORIGINAL</option> + <option selected="false" value="LOGLESS_CACHING">LOGLESS_CACHING</option> + <option selected="false" value="AVX_LOGLESS_CACHING">AVX_LOGLESS_CACHING</option> + <option selected="false" value="AVX_LOGLESS_CACHING_OMP">AVX_LOGLESS_CACHING_OMP</option> + <option selected="false" value="EXPERIMENTAL_FPGA_LOGLESS_CACHING">EXPERIMENTAL_FPGA_LOGLESS_CACHING</option> + <option selected="true" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> + </param> + <param name="pcr_indel_model" argument="--pcr-indel-model" type="select" optional="true" label="Pcr Indel Model" help="The PCR indel model to use"> + <option selected="false" value="NONE">NONE</option> + <option selected="false" value="HOSTILE">HOSTILE</option> + <option selected="false" value="AGGRESSIVE">AGGRESSIVE</option> + <option selected="true" value="CONSERVATIVE">CONSERVATIVE</option> + </param> + <param name="phred_scaled_global_read_mismapping_rate" argument="--phred-scaled-global-read-mismapping-rate" type="integer" optional="true" value="45" label="Phred Scaled Global Read Mismapping Rate" help="The global assumed mismapping rate for reads"/> + <param name="smith_waterman" argument="--smith-waterman" type="select" optional="true" label="Smith Waterman" help="Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice"> + <option selected="false" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> + <option selected="false" value="AVX_ENABLED">AVX_ENABLED</option> + <option selected="true" value="JAVA">JAVA</option> + </param> + <param name="use_filtered_reads_for_annotations" argument="--use-filtered-reads-for-annotations" type="boolean" truevalue="--use-filtered-reads-for-annotations" falsevalue="" optional="true" checked="false" label="Use Filtered Reads For Annotations" help="Use the contamination-filtered read maps for the purposes of annotating variants"/> + </when> + <when value="no" /> + </conditional> + <conditional name="output"> + <param name="output_parameters" type="select" label="Output parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <param name="activity_profile_out_sel" argument="--activity_profile_out_sel" type="boolean" truevalue="--activity_profile_out_sel" falsevalue="" optional="true" checked="false" label="Activity Profile Out" help="Output the raw activity profile results in IGV format"/> + <param name="assembly_region_out_sel" argument="--assembly_region_out_sel" type="boolean" truevalue="--assembly_region_out_sel" falsevalue="" optional="true" checked="false" label="Assembly Region Out" help="Output the assembly region to this IGV formatted file"/> + <param name="bam_output_sel" argument="--bam_output_sel" type="boolean" truevalue="--bam_output_sel" falsevalue="" optional="true" checked="false" label="Bam Output" help="File to which assembled haplotypes should be written"/> + <param name="graph_output_sel" argument="--graph_output_sel" type="boolean" truevalue="--graph_output_sel" falsevalue="" optional="true" checked="false" label="Graph Output" help="Write debug assembly graph information to this file"/> + </when> + <when value="no" /> + </conditional> + </inputs> + <outputs> + <expand macro="gzip_vcf_output_params"/> + <data format="tabular" name="activity_profile_out" label="${tool.name} on ${on_string}: activity_profile_out tabular"> + <filter>output_opt['activity_profile_out_sel']</filter> + </data> + <data format="tabular" name="assembly_region_out" label="${tool.name} on ${on_string}: assembly_region_out tabular"> + <filter>output_opt['assembly_region_out_sel']</filter> + </data> + <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: bam_output bam"> + <filter>output_opt['bam_output_sel']</filter> + </data> + <data format="txt" name="graph_output" label="${tool.name} on ${on_string}: graph_output txt"> + <filter>output_opt['graph_output_sel']</filter> + </data> + </outputs> + <tests> + <test /> + </tests> + <help><![CDATA[Call somatic short variants via local assembly of haplotypes. Short +variants include single nucleotide (SNV) and insertion and deletion +(indel) variants. The caller combines the DREAM challenge-winning +somatic genotyping engine of the original MuTect (`Cibulskis et al., +2013 <http://www.nature.com/nbt/journal/v31/n3/full/nbt.2514.html>`__) +with the assembly-based machinery of +`HaplotypeCaller <https://www.broadinstitute.org/gatk/documentation/tooldocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php>`__. + +This tool is featured in the *Somatic Short Mutation calling Best +Practice Workflow*. See +`Tutorial#11136 <https://software.broadinstitute.org/gatk/documentation/article?id=11136>`__ +for a step-by-step description of the workflow and +`Article#11127 <https://software.broadinstitute.org/gatk/documentation/article?id=11127>`__ +for an overview of what traditional somatic calling entails. For the +latest pipeline scripts, see the `Mutect2 WDL scripts +directory <https://github.com/broadinstitute/gatk/tree/master/scripts/mutect2_wdl>`__. +Although we present the tool for somatic calling, it may apply to other +contexts, such as mitochondrial variant calling. + +Usage examples +~~~~~~~~~~~~~~ + +Example commands show how to run Mutect2 for typical scenarios. The two +modes are (i) *somatic mode* where a tumor sample is matched with a +normal sample in analysis and (ii) *tumor-only mode* where a single +sample's alignment data undergoes analysis. + +(i) Tumor with matched normal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Given a matched normal, Mutect2 is designed to call somatic variants +only. The tool includes logic to skip emitting variants that are clearly +present in the germline based on provided evidence, e.g. in the matched +normal. This is done at an early stage to avoid spending computational +resources on germline events. If the variant's germline status is +borderline, then Mutect2 will emit the variant to the callset for +subsequent filtering and review. + +:: + + gatk Mutect2 \ + -R reference.fa \ + -I tumor.bam \ + -tumor tumor_sample_name \ + -I normal.bam \ + -normal normal_sample_name \ + --germline-resource af-only-gnomad.vcf.gz \ + --af-of-alleles-not-in-resource 0.00003125 \ + --panel-of-normals pon.vcf.gz \ + -O somatic.vcf.gz + + +The --af-of-alleles-not-in-resource argument value should match +expectations for alleles not found in the provided germline resource. +Note the tool does not require a germline resource nor a panel of +normals (PoN) to run. The tool prefilters sites for the matched normal +and the PoN. For the germline resource, the tool prefilters on the +allele. Below is an excerpt of a known variants resource with population +allele frequencies + +:: + + #CHROM POS ID REF ALT QUAL FILTER INFO + 1 10067 . T TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 30.35 PASS AC=3;AF=7.384E-5 + 1 10108 . CAACCCT C 46514.32 PASS AC=6;AF=1.525E-4 + 1 10109 . AACCCTAACCCT AAACCCT,* 89837.27 PASS AC=48,5;AF=0.001223,1.273E-4 + 1 10114 . TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA *,CAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA,T 36728.97 PASS AC=55,9,1;AF=0.001373,2.246E-4,2.496E-5 + 1 10119 . CT C,* 251.23 PASS AC=5,1;AF=1.249E-4,2.498E-5 + 1 10120 . TA CA,* 14928.74 PASS AC=10,6;AF=2.5E-4,1.5E-4 + 1 10128 . ACCCTAACCCTAACCCTAAC A,* 285.71 PASS AC=3,1;AF=7.58E-5,2.527E-5 + 1 10131 . CT C,* 378.93 PASS AC=7,5;AF=1.765E-4,1.261E-4 + 1 10132 . TAACCC *,T 18025.11 PASS AC=12,2;AF=3.03E-4,5.049E-5 + + +(ii) Tumor-only mode +^^^^^^^^^^^^^^^^^^^^ + +This mode runs on a single sample, e.g. single tumor or single normal +sample. To create a PoN, call on each normal sample in this mode, then +use CreateSomaticPanelOfNormals to generate the PoN. + +:: + + gatk Mutect2 \ + -R reference.fa \ + -I sample.bam \ + -tumor sample_name \ + -O single_sample.vcf.gz + + +Further points of interest +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Additional parameters that factor towards filtering, including +normal-artifact-lod (default threshold 0.0) and tumor-lod (default +threshold 5.3), are available in FilterMutectCalls. While the tool +calculates normal-lod assuming a diploid genotype, it calculates +normal-artifact-lod with the same approach it uses for tumor-lod, i.e. +with a variable ploidy assumption. + +- If the normal artifact log odds becomes large, then FilterMutectCalls applies the artifact-in-normal filter. For matched normal samples with tumor contamination, consider increasing the normal-artifact-lod threshold. + +- The tumor log odds, which is calculated independently of any matched normal, determines whether to filter a tumor variant. Variants with tumor LODs exceeding the threshold pass filtering. + + +If a variant is absent from a given germline resource, then the value +for --af-of-alleles-not-in-resource applies. For example, gnomAD's +16,000 samples (~32,000 homologs per locus) becomes a probability of one +in 32,000 or less. Thus, an allele's absence from the germline resource +becomes evidence that it is not a germline variant. + +Caveats +~~~~~~~ + +Although GATK4 Mutect2 accomodates varying coverage depths, further +optimization of parameters may improve calling for extreme high depths, +e.g. 1000X. +]]></help> + <citations> + <expand macro="citations"/> + </citations> +</tool>
