comparison gatk4_Mutect2.xml @ 0:c27f4eb641bf draft

Uploaded
author dave
date Thu, 26 Sep 2019 09:23:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c27f4eb641bf
1 <tool id="gatk4_mutect2" name="GATK4 Mutect2" version="@WRAPPER_VERSION@0" profile="18.05">
2 <description>- Call somatic SNVs and indels via local assembly of haplotypes</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_cmd"/>
8 <command detect_errors="exit_code">
9 <![CDATA[
10 #include source=$set_sections#
11 #include source=$pre_gatk_excl_ints_chth#
12 #include source=$bam_index_pre_chth#
13 #include source=$pre_gatk_ints_chth#
14 ln -s '$reference_source.reference_sequence' reference.fa &&
15 samtools faidx reference.fa &&
16 gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" &&
17 @CMD_BEGIN@ Mutect2
18
19 #include source=$gatk_bam_input#
20
21 ## COMMON PARAMETERS ##
22
23 #if str($common.common_parameters) == 'yes'
24
25 --read-filter="$common.read_filter"
26 --seconds-between-progress-updates="$common.seconds_between_progress_updates"
27 --verbosity="$common.verbosity"
28 --read-validation-stringency="$common.read_validation_stringency"
29 --interval-set-rule="$common.interval_set_rule"
30 --gatk-config-file="$common.gatk_config_file"
31 --disable-read-filter="$common.disable_read_filter"
32 $common.QUIET
33 $common.use_jdk_deflater
34 $common.use_jdk_inflater
35 $common.lenient
36 $common.disable_tool_default_read_filters
37 $common.add_output_sam_program_record
38 $common.add_output_vcf_command_line
39
40 #end if
41
42 ## END COMMON PARAMETERS ##
43
44 ## OPTIONAL PARAMETERS ##
45
46 #if str($optional.optional_parameters) == 'yes'
47
48 #if $optional.population_callset
49 #set datatype = $optional.population_callset.datatype
50 #if $optional.population_callset.is_of_type("vcf_bgzip")
51 ln -s $optional.population_callset population_callset.vcf.gz &&
52 tabix population_callset.vcf.gz &&
53 #else
54 ln -s $optional.population_callset population_callset.vcf &&
55 #end if
56 #end if
57
58 #if $optional.panel_of_normals
59 #set datatype = $optional.panel_of_normals.datatype
60 #if $optional.panel_of_normals.is_of_type("vcf_bgzip")
61 ln -s $optional.panel_of_normals panel_of_normals.vcf.gz &&
62 tabix panel_of_normals.vcf.gz &&
63 #else
64 ln -s $optional.panel_of_normals panel_of_normals.vcf &&
65 #end if
66 #end if
67
68 #if $optional.germline_resource
69 #set datatype = $optional.germline_resource.datatype
70 #if $optional.germline_resource.is_of_type("vcf_bgzip")
71 ln -s $optional.germline_resource germline_resource.vcf.gz &&
72 tabix germline_resource.vcf.gz &&
73 #else
74 ln -s $optional.germline_resource germline_resource.vcf &&
75 #end if
76 #end if
77
78 #if $optional.alleles
79 #set datatype = $optional.alleles.datatype
80 #if $optional.alleles.is_of_type("vcf_bgzip")
81 ln -s $optional.alleles alleles.vcf.gz &&
82 tabix alleles.vcf.gz &&
83 #else
84 ln -s $optional.alleles alleles.vcf &&
85 #end if
86 #end if
87
88 #if $optional.panel_of_normals
89 #if $optional.panel_of_normals.is_of_type("vcf_bgzip")
90 --panel-of-normals panel_of_normals.vcf.gz
91 #else
92 --panel-of-normals panel_of_normals.vcf
93 #end if
94 #end if
95
96 #if $optional.pedigree
97 --pedigree="$optional.pedigree"
98 #end if
99
100 #if $optional.alleles
101 #if $optional.alleles.is_of_type("vcf_bgzip")
102 --alleles alleles.vcf.gz
103 #else
104 --alleles alleles.vcf
105 #end if
106 #end if
107
108 #if $optional.germline_resource
109 #if $optional.germline_resource.is_of_type("vcf_bgzip")
110 --germline-resource germline_resource.vcf.gz
111 #else
112 --germline-resource germline_resource.vcf
113 #end if
114 #end if
115
116 #if $optional.population_callset
117 #if $optional.population_callset.is_of_type("vcf_bgzip")
118 --population-callset population_callset.vcf.gz
119 #else
120 --population-callset population_callset.vcf
121 #end if
122 #end if
123
124 #if $optional.arguments_file
125 --arguments_file="$optional.arguments_file"
126 #end if
127
128 --base-quality-score-threshold="$optional.base_quality_score_threshold"
129 --cloud-index-prefetch-buffer="$optional.cloud_index_prefetch_buffer"
130 --cloud-prefetch-buffer="$optional.cloud_prefetch_buffer"
131 --annotation="$optional.annotation"
132 --annotation-group="$optional.annotation_group"
133 --annotations-to-exclude="$optional.annotations_to_exclude"
134 --af-of-alleles-not-in-resource="$optional.af_of_alleles_not_in_resource"
135 --contamination-fraction-to-filter="$optional.contamination_fraction_to_filter"
136 --downsampling-stride="$optional.downsampling_stride"
137 --founder-id="$optional.founder_id"
138 --gcs-max-retries="$optional.gcs_max_retries"
139 --genotyping-mode="$optional.genotyping_mode"
140 --heterozygosity="$optional.heterozygosity"
141 --heterozygosity-stdev="$optional.heterozygosity_stdev"
142 --indel-heterozygosity="$optional.indel_heterozygosity"
143 --initial-tumor-lod="$optional.initial_tumor_lod"
144 --interval-merging-rule="$optional.interval_merging_rule"
145 --max-population-af="$optional.max_population_af"
146 --max-reads-per-alignment-start="$optional.max_reads_per_alignment_start"
147 --min-base-quality-score="$optional.min_base_quality_score"
148 --native-pair-hmm-threads="$optional.native_pair_hmm_threads"
149 --normal-lod="$optional.normal_lod"
150 --normal-sample="$optional.normal_sample"
151 --num-reference-samples-if-no-call="$optional.num_reference_samples_if_no_call"
152 --output-mode="$optional.output_mode"
153 --sample-ploidy="$optional.sample_ploidy"
154 --standard-min-confidence-threshold-for-calling="$optional.standard_min_confidence_threshold_for_calling"
155 --tumor-lod-to-emit="$optional.tumor_lod_to_emit"
156 $optional.annotate_with_num_discovered_alleles
157 $optional.disable_bam_index_caching
158 $optional.disable_sequence_dictionary_validation
159 $optional.genotype_germline_sites
160 $optional.genotype_pon_sites
161 $optional.native_pair_hmm_use_double_precision
162 $optional.sites_only_vcf_output
163 $optional.use_new_qual_calculator
164 #end if
165
166 ## END OPTIONAL PARAMETERS ##
167
168 ## ADVANCED PARAMETERS ##
169
170 #if str($advanced.advanced_parameters) == 'yes'
171 --active-probability-threshold="$advanced.active_probability_threshold"
172 --assembly-region-padding="$advanced.assembly_region_padding"
173 --bam-writer-type="$advanced.bam_writer_type"
174 --contamination-fraction-per-sample-file="$advanced.contamination_fraction_per_sample_file"
175 --input-prior="$advanced.input_prior"
176 --kmer-size="$advanced.kmer_size"
177 --max-alternate-alleles="$advanced.max_alternate_alleles"
178 --max-assembly-region-size="$advanced.max_assembly_region_size"
179 --max-genotype-count="$advanced.max_genotype_count"
180 --max-mnp-distance="$advanced.max_mnp_distance"
181 --max-num-haplotypes-in-population="$advanced.max_num_haplotypes_in_population"
182 --max-prob-propagation-distance="$advanced.max_prob_propagation_distance"
183 --max-suspicious-reads-per-alignment-start="$advanced.max_suspicious_reads_per_alignment_start"
184 --min-assembly-region-size="$advanced.min_assembly_region_size"
185 --min-dangling-branch-length="$advanced.min_dangling_branch_length"
186 --min-pruning="$advanced.min_pruning"
187 --num-pruning-samples="$advanced.num_pruning_samples"
188 --pair-hmm-gap-continuation-penalty="$advanced.pair_hmm_gap_continuation_penalty"
189 --pair-hmm-implementation="$advanced.pair_hmm_implementation"
190 --pcr-indel-model="$advanced.pcr_indel_model"
191 --phred-scaled-global-read-mismapping-rate="$advanced.phred_scaled_global_read_mismapping_rate"
192 --smith-waterman="$advanced.smith_waterman"
193 $advanced.all_site_pls
194 $advanced.allow_non_unique_kmers_in_ref
195 $advanced.consensus
196 $advanced.debug
197 $advanced.disable_tool_default_annotations
198 $advanced.do_not_run_physical_phasing
199 $advanced.dont_increase_kmer_sizes_for_cycles
200 $advanced.dont_trim_active_regions
201 $advanced.dont_use_soft_clipped_bases
202 $advanced.enable_all_annotations
203 $advanced.genotype_filtered_alleles
204 $advanced.use_filtered_reads_for_annotations
205
206 #end if
207
208 ## END ADVANCED PARAMETERS ##
209
210 ## ADDITIONAL OUTPUT PARAMETERS ##
211
212 #if str($advanced.advanced_parameters) == 'yes'
213
214 --activity-profile-out="$activity_profile_out"
215 --assembly-region-out="$assembly_region_out"
216 --bam-output="$bam_output"
217 --graph-output="$graph_output"
218
219 #end if
220
221 #include source=$gatk_excl_ints_chth#
222 #include source=$gatk_ints_chth#
223 #include source=$vcf_output_opts#
224 #include source=$ref_opts#
225 #include source=$gatk_seqdict#
226 #if $tumor_sample
227 --tumor-sample="$tumor_sample"
228 #end if
229 ]]>
230 </command>
231 <inputs>
232 <expand macro="gatk_bam_req_params"/>
233 <expand macro="gzip_vcf_params"/>
234 <expand macro="ref_sel"/>
235 <param name="tumor_sample" argument="--tumor-sample" type="text" optional="false" value="" label="Tumor Sample" help="BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument."/>
236 <conditional name="common">
237 <param name="common_parameters" type="select" label="Common parameters">
238 <option value="no">Use internal defaults</option>
239 <option value="yes">Specify parameters</option>
240 </param>
241 <when value="yes">
242 <expand macro="gatk_excl_ints"/>
243 <expand macro="seq_dict_sel"/>
244 <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/>
245 <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/>
246 <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/>
247 <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/>
248 <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="" label="Gatk Config File" help="A configuration file to use with the GATK."/>
249 <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs">
250 <option selected="true" value="UNION">UNION</option>
251 <option selected="false" value="INTERSECTION">INTERSECTION</option>
252 </param>
253 <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/>
254 <param name="QUIET" argument="--QUIET" type="boolean" truevalue="--QUIET" falsevalue="" optional="true" checked="false" label="Quiet" help="Whether to suppress job-summary info on System.err."/>
255 <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/>
256 <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded.">
257 <option selected="false" value="STRICT">STRICT</option>
258 <option selected="false" value="LENIENT">LENIENT</option>
259 <option selected="true" value="SILENT">SILENT</option>
260 </param>
261 <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/>
262 <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/>
263 <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/>
264 <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging.">
265 <option selected="false" value="ERROR">ERROR</option>
266 <option selected="false" value="WARNING">WARNING</option>
267 <option selected="true" value="INFO">INFO</option>
268 <option selected="false" value="DEBUG">DEBUG</option>
269 </param>
270 </when>
271 <when value="no" />
272 </conditional>
273 <conditional name="optional">
274 <param name="optional_parameters" type="select" label="Optional parameters">
275 <option value="no">Use internal defaults</option>
276 <option value="yes">Specify parameters</option>
277 </param>
278 <when value="yes">
279 <expand macro="gatk_ints"/>
280 <param name="af_of_alleles_not_in_resource" argument="--af-of-alleles-not-in-resource" type="float" optional="true" value="-1.0" label="Af Of Alleles Not In Resource" help="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf fora derivation of the default value."/>
281 <param name="alleles" argument="--alleles" type="data" optional="true" format="" label="Alleles" help="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/>
282 <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/>
283 <param name="annotation" argument="--annotation" type="text" optional="true" value="" label="Annotation" help="One or more specific annotations to add to variant calls"/>
284 <param name="annotation_group" argument="--annotation-group" type="text" optional="true" value="" label="Annotation Group" help="One or more groups of annotations to apply to variant calls"/>
285 <param name="annotations_to_exclude" argument="--annotations-to-exclude" type="text" optional="true" value="" label="Annotations To Exclude" help="One or more specific annotations to exclude from variant calls"/>
286 <param name="arguments_file" argument="--arguments_file" type="data" optional="true" format="" label="Arguments_File" help="read one or more arguments files and add them to the command line"/>
287 <param name="base_quality_score_threshold" argument="--base-quality-score-threshold" type="integer" optional="true" value="18" label="Base Quality Score Threshold" help="Base qualities below this threshold will be reduced to the minimum (6)"/>
288 <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/>
289 <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/>
290 <param name="contamination_fraction_to_filter" argument="--contamination-fraction-to-filter" type="float" optional="true" value="0.0" label="Contamination Fraction To Filter" help="Fraction of contamination in sequencing data (for all samples) to aggressively remove"/>
291 <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&amp;apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/>
292 <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/>
293 <param name="downsampling_stride" argument="--downsampling-stride" type="integer" optional="true" value="1" label="Downsampling Stride" help="Downsample a pool of reads starting within a range of one or more bases."/>
294 <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &amp;quot;founders&amp;quot;"/>
295 <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/>
296 <param name="genotype_germline_sites" argument="--genotype-germline-sites" type="boolean" truevalue="--genotype-germline-sites" falsevalue="" optional="true" checked="false" label="Genotype Germline Sites" help="(EXPERIMENTAL) Call all apparent germline site even though they will ultimately be filtered."/>
297 <param name="genotype_pon_sites" argument="--genotype-pon-sites" type="boolean" truevalue="--genotype-pon-sites" falsevalue="" optional="true" checked="false" label="Genotype Pon Sites" help="Call sites in the PoN even though they will ultimately be filtered."/>
298 <param name="genotyping_mode" argument="--genotyping-mode" type="select" optional="true" label="Genotyping Mode" help="Specifies how to determine the alternate alleles to use for genotyping">
299 <option selected="true" value="DISCOVERY">DISCOVERY</option>
300 <option selected="false" value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
301 </param>
302 <param name="germline_resource" argument="--germline-resource" type="data" optional="true" format="" label="Germline Resource" help="Population vcf of germline sequencing containing allele fractions."/>
303 <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/>
304 <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/>
305 <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/>
306 <param name="initial_tumor_lod" argument="--initial-tumor-lod" type="float" optional="true" value="2.0" label="Initial Tumor Lod" help="LOD threshold to consider pileup active."/>
307 <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals">
308 <option selected="true" value="ALL">ALL</option>
309 <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option>
310 </param>
311 <param name="max_population_af" argument="--max-population-af" type="float" optional="true" value="0.01" label="Max Population Af" help="Maximum population allele frequency in tumor-only mode."/>
312 <param name="max_reads_per_alignment_start" argument="--max-reads-per-alignment-start" type="integer" optional="true" value="50" label="Max Reads Per Alignment Start" help="Maximum number of reads to retain per alignment start position. Reads above this threshold will be downsampled. Set to 0 to disable."/>
313 <param name="min_base_quality_score" argument="--min-base-quality-score" type="integer" optional="true" value="10" label="Min Base Quality Score" help="Minimum base quality required to consider a base for calling"/>
314 <param name="native_pair_hmm_threads" argument="--native-pair-hmm-threads" type="integer" optional="true" value="4" label="Native Pair Hmm Threads" help="How many threads should a native pairHMM implementation use"/>
315 <param name="native_pair_hmm_use_double_precision" argument="--native-pair-hmm-use-double-precision" type="boolean" truevalue="--native-pair-hmm-use-double-precision" falsevalue="" optional="true" checked="false" label="Native Pair Hmm Use Double Precision" help="use double precision in the native pairHmm. This is slower but matches the java implementation better"/>
316 <param name="normal_lod" argument="--normal-lod" type="float" optional="true" value="2.2" label="Normal Lod" help="LOD threshold for calling normal variant non-germline."/>
317 <param name="normal_sample" argument="--normal-sample" type="text" optional="true" value="" label="Normal Sample" help="BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument."/>
318 <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/>
319 <param name="output_mode" argument="--output-mode" type="select" optional="true" label="Output Mode" help="Specifies which type of calls we should output">
320 <option selected="true" value="EMIT_VARIANTS_ONLY">EMIT_VARIANTS_ONLY</option>
321 <option selected="false" value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option>
322 <option selected="false" value="EMIT_ALL_SITES">EMIT_ALL_SITES</option>
323 </param>
324 <param name="panel_of_normals" argument="--panel-of-normals" type="data" optional="true" format="" label="Panel Of Normals" help="VCF file of sites observed in normal."/>
325 <param name="pedigree" argument="--pedigree" type="data" optional="true" format="" label="Pedigree" help="Pedigree file for determining the population &amp;quot;founders&amp;quot;"/>
326 <param name="population_callset" argument="--population-callset" type="data" optional="true" format="" label="Population Callset" help="Callset to use in calculating genotype priors"/>
327 <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/>
328 <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&amp;apos;t emit genotype fields when writing vcf file output."/>
329 <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/>
330 <param name="tumor_lod_to_emit" argument="--tumor-lod-to-emit" type="float" optional="true" value="3.0" label="Tumor Lod To Emit" help="LOD threshold to emit tumor variant to VCF."/>
331 <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/>
332 </when>
333 <when value="no" />
334 </conditional>
335 <conditional name="advanced">
336 <param name="advanced_parameters" type="select" label="Advanced parameters">
337 <option value="no">Use internal defaults</option>
338 <option value="yes">Specify parameters</option>
339 </param>
340 <when value="yes">
341 <param name="active_probability_threshold" argument="--active-probability-threshold" type="float" optional="true" value="0.002" label="Active Probability Threshold" help="Minimum probability for a locus to be considered active."/>
342 <param name="all_site_pls" argument="--all-site-pls" type="boolean" truevalue="--all-site-pls" falsevalue="" optional="true" checked="false" label="All Site Pls" help="Annotate all sites with PLs"/>
343 <param name="allow_non_unique_kmers_in_ref" argument="--allow-non-unique-kmers-in-ref" type="boolean" truevalue="--allow-non-unique-kmers-in-ref" falsevalue="" optional="true" checked="false" label="Allow Non Unique Kmers In Ref" help="Allow graphs that have non-unique kmers in the reference"/>
344 <param name="assembly_region_padding" argument="--assembly-region-padding" type="integer" optional="true" value="100" label="Assembly Region Padding" help="Number of additional bases of context to include around each assembly region"/>
345 <param name="bam_writer_type" argument="--bam-writer-type" type="select" optional="true" label="Bam Writer Type" help="Which haplotypes should be written to the BAM">
346 <option selected="false" value="ALL_POSSIBLE_HAPLOTYPES">ALL_POSSIBLE_HAPLOTYPES</option>
347 <option selected="true" value="CALLED_HAPLOTYPES">CALLED_HAPLOTYPES</option>
348 </param>
349 <param name="consensus" argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" optional="true" checked="false" label="Consensus" help="1000G consensus mode"/>
350 <param name="contamination_fraction_per_sample_file" argument="--contamination-fraction-per-sample-file" type="data" optional="true" format="" label="Contamination Fraction Per Sample File" help="Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be &amp;quot;&amp;lt;SampleID&amp;gt;&amp;lt;TAB&amp;gt;&amp;lt;Contamination&amp;gt;&amp;quot; (Contamination is double) per line; No header."/>
351 <param name="debug" argument="--debug" type="boolean" truevalue="--debug" falsevalue="" optional="true" checked="false" label="Debug" help="Print out very verbose debug information about each triggering active region"/>
352 <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/>
353 <param name="do_not_run_physical_phasing" argument="--do-not-run-physical-phasing" type="boolean" truevalue="--do-not-run-physical-phasing" falsevalue="" optional="true" checked="false" label="Do Not Run Physical Phasing" help="Disable physical phasing"/>
354 <param name="dont_increase_kmer_sizes_for_cycles" argument="--dont-increase-kmer-sizes-for-cycles" type="boolean" truevalue="--dont-increase-kmer-sizes-for-cycles" falsevalue="" optional="true" checked="false" label="Dont Increase Kmer Sizes For Cycles" help="Disable iterating over kmer sizes when graph cycles are detected"/>
355 <param name="dont_trim_active_regions" argument="--dont-trim-active-regions" type="boolean" truevalue="--dont-trim-active-regions" falsevalue="" optional="true" checked="false" label="Dont Trim Active Regions" help="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping"/>
356 <param name="dont_use_soft_clipped_bases" argument="--dont-use-soft-clipped-bases" type="boolean" truevalue="--dont-use-soft-clipped-bases" falsevalue="" optional="true" checked="false" label="Dont Use Soft Clipped Bases" help="Do not analyze soft clipped bases in the reads"/>
357 <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/>
358 <param name="genotype_filtered_alleles" argument="--genotype-filtered-alleles" type="boolean" truevalue="--genotype-filtered-alleles" falsevalue="" optional="true" checked="false" label="Genotype Filtered Alleles" help="Whether to genotype all given alleles, even filtered ones, --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/>
359 <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/>
360 <param name="kmer_size" argument="--kmer-size" type="integer" optional="true" value="" label="Kmer Size" help="Kmer size to use in the read threading assembler"/>
361 <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/>
362 <param name="max_assembly_region_size" argument="--max-assembly-region-size" type="integer" optional="true" value="300" label="Max Assembly Region Size" help="Maximum size of an assembly region"/>
363 <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/>
364 <param name="max_mnp_distance" argument="--max-mnp-distance" type="integer" optional="true" value="1" label="Max Mnp Distance" help="Two or more phased substitutions separated by this distance or less are merged into MNPs."/>
365 <param name="max_num_haplotypes_in_population" argument="--max-num-haplotypes-in-population" type="integer" optional="true" value="128" label="Max Num Haplotypes In Population" help="Maximum number of haplotypes to consider for your population"/>
366 <param name="max_prob_propagation_distance" argument="--max-prob-propagation-distance" type="integer" optional="true" value="50" label="Max Prob Propagation Distance" help="Upper limit on how many bases away probability mass can be moved around when calculating the boundaries between active and inactive assembly regions"/>
367 <param name="max_suspicious_reads_per_alignment_start" argument="--max-suspicious-reads-per-alignment-start" type="integer" optional="true" value="0" label="Max Suspicious Reads Per Alignment Start" help="Maximum number of suspicious reads (mediocre mapping quality or too many substitutions) allowed in a downsampling stride. Set to 0 to disable."/>
368 <param name="min_assembly_region_size" argument="--min-assembly-region-size" type="integer" optional="true" value="50" label="Min Assembly Region Size" help="Minimum size of an assembly region"/>
369 <param name="min_dangling_branch_length" argument="--min-dangling-branch-length" type="integer" optional="true" value="4" label="Min Dangling Branch Length" help="Minimum length of a dangling branch to attempt recovery"/>
370 <param name="min_pruning" argument="--min-pruning" type="integer" optional="true" value="2" label="Min Pruning" help="Minimum support to not prune paths in the graph"/>
371 <param name="num_pruning_samples" argument="--num-pruning-samples" type="integer" optional="true" value="1" label="Num Pruning Samples" help="Number of samples that must pass the minPruning threshold"/>
372 <param name="pair_hmm_gap_continuation_penalty" argument="--pair-hmm-gap-continuation-penalty" type="integer" optional="true" value="10" label="Pair Hmm Gap Continuation Penalty" help="Flat gap continuation penalty for use in the Pair HMM"/>
373 <param name="pair_hmm_implementation" argument="--pair-hmm-implementation" type="select" optional="true" label="Pair Hmm Implementation" help="The PairHMM implementation to use for genotype likelihood calculations">
374 <option selected="false" value="EXACT">EXACT</option>
375 <option selected="false" value="ORIGINAL">ORIGINAL</option>
376 <option selected="false" value="LOGLESS_CACHING">LOGLESS_CACHING</option>
377 <option selected="false" value="AVX_LOGLESS_CACHING">AVX_LOGLESS_CACHING</option>
378 <option selected="false" value="AVX_LOGLESS_CACHING_OMP">AVX_LOGLESS_CACHING_OMP</option>
379 <option selected="false" value="EXPERIMENTAL_FPGA_LOGLESS_CACHING">EXPERIMENTAL_FPGA_LOGLESS_CACHING</option>
380 <option selected="true" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option>
381 </param>
382 <param name="pcr_indel_model" argument="--pcr-indel-model" type="select" optional="true" label="Pcr Indel Model" help="The PCR indel model to use">
383 <option selected="false" value="NONE">NONE</option>
384 <option selected="false" value="HOSTILE">HOSTILE</option>
385 <option selected="false" value="AGGRESSIVE">AGGRESSIVE</option>
386 <option selected="true" value="CONSERVATIVE">CONSERVATIVE</option>
387 </param>
388 <param name="phred_scaled_global_read_mismapping_rate" argument="--phred-scaled-global-read-mismapping-rate" type="integer" optional="true" value="45" label="Phred Scaled Global Read Mismapping Rate" help="The global assumed mismapping rate for reads"/>
389 <param name="smith_waterman" argument="--smith-waterman" type="select" optional="true" label="Smith Waterman" help="Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice">
390 <option selected="false" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option>
391 <option selected="false" value="AVX_ENABLED">AVX_ENABLED</option>
392 <option selected="true" value="JAVA">JAVA</option>
393 </param>
394 <param name="use_filtered_reads_for_annotations" argument="--use-filtered-reads-for-annotations" type="boolean" truevalue="--use-filtered-reads-for-annotations" falsevalue="" optional="true" checked="false" label="Use Filtered Reads For Annotations" help="Use the contamination-filtered read maps for the purposes of annotating variants"/>
395 </when>
396 <when value="no" />
397 </conditional>
398 <conditional name="output">
399 <param name="output_parameters" type="select" label="Output parameters">
400 <option value="no">Use internal defaults</option>
401 <option value="yes">Specify parameters</option>
402 </param>
403 <when value="yes">
404 <param name="activity_profile_out_sel" argument="--activity_profile_out_sel" type="boolean" truevalue="--activity_profile_out_sel" falsevalue="" optional="true" checked="false" label="Activity Profile Out" help="Output the raw activity profile results in IGV format"/>
405 <param name="assembly_region_out_sel" argument="--assembly_region_out_sel" type="boolean" truevalue="--assembly_region_out_sel" falsevalue="" optional="true" checked="false" label="Assembly Region Out" help="Output the assembly region to this IGV formatted file"/>
406 <param name="bam_output_sel" argument="--bam_output_sel" type="boolean" truevalue="--bam_output_sel" falsevalue="" optional="true" checked="false" label="Bam Output" help="File to which assembled haplotypes should be written"/>
407 <param name="graph_output_sel" argument="--graph_output_sel" type="boolean" truevalue="--graph_output_sel" falsevalue="" optional="true" checked="false" label="Graph Output" help="Write debug assembly graph information to this file"/>
408 </when>
409 <when value="no" />
410 </conditional>
411 </inputs>
412 <outputs>
413 <expand macro="gzip_vcf_output_params"/>
414 <data format="tabular" name="activity_profile_out" label="${tool.name} on ${on_string}: activity_profile_out tabular">
415 <filter>output_opt['activity_profile_out_sel']</filter>
416 </data>
417 <data format="tabular" name="assembly_region_out" label="${tool.name} on ${on_string}: assembly_region_out tabular">
418 <filter>output_opt['assembly_region_out_sel']</filter>
419 </data>
420 <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: bam_output bam">
421 <filter>output_opt['bam_output_sel']</filter>
422 </data>
423 <data format="txt" name="graph_output" label="${tool.name} on ${on_string}: graph_output txt">
424 <filter>output_opt['graph_output_sel']</filter>
425 </data>
426 </outputs>
427 <tests>
428 <test />
429 </tests>
430 <help><![CDATA[Call somatic short variants via local assembly of haplotypes. Short
431 variants include single nucleotide (SNV) and insertion and deletion
432 (indel) variants. The caller combines the DREAM challenge-winning
433 somatic genotyping engine of the original MuTect (`Cibulskis et al.,
434 2013 <http://www.nature.com/nbt/journal/v31/n3/full/nbt.2514.html>`__)
435 with the assembly-based machinery of
436 `HaplotypeCaller <https://www.broadinstitute.org/gatk/documentation/tooldocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php>`__.
437
438 This tool is featured in the *Somatic Short Mutation calling Best
439 Practice Workflow*. See
440 `Tutorial#11136 <https://software.broadinstitute.org/gatk/documentation/article?id=11136>`__
441 for a step-by-step description of the workflow and
442 `Article#11127 <https://software.broadinstitute.org/gatk/documentation/article?id=11127>`__
443 for an overview of what traditional somatic calling entails. For the
444 latest pipeline scripts, see the `Mutect2 WDL scripts
445 directory <https://github.com/broadinstitute/gatk/tree/master/scripts/mutect2_wdl>`__.
446 Although we present the tool for somatic calling, it may apply to other
447 contexts, such as mitochondrial variant calling.
448
449 Usage examples
450 ~~~~~~~~~~~~~~
451
452 Example commands show how to run Mutect2 for typical scenarios. The two
453 modes are (i) *somatic mode* where a tumor sample is matched with a
454 normal sample in analysis and (ii) *tumor-only mode* where a single
455 sample's alignment data undergoes analysis.
456
457 (i) Tumor with matched normal
458 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
459
460 Given a matched normal, Mutect2 is designed to call somatic variants
461 only. The tool includes logic to skip emitting variants that are clearly
462 present in the germline based on provided evidence, e.g. in the matched
463 normal. This is done at an early stage to avoid spending computational
464 resources on germline events. If the variant's germline status is
465 borderline, then Mutect2 will emit the variant to the callset for
466 subsequent filtering and review.
467
468 ::
469
470 gatk Mutect2 \
471 -R reference.fa \
472 -I tumor.bam \
473 -tumor tumor_sample_name \
474 -I normal.bam \
475 -normal normal_sample_name \
476 --germline-resource af-only-gnomad.vcf.gz \
477 --af-of-alleles-not-in-resource 0.00003125 \
478 --panel-of-normals pon.vcf.gz \
479 -O somatic.vcf.gz
480
481
482 The --af-of-alleles-not-in-resource argument value should match
483 expectations for alleles not found in the provided germline resource.
484 Note the tool does not require a germline resource nor a panel of
485 normals (PoN) to run. The tool prefilters sites for the matched normal
486 and the PoN. For the germline resource, the tool prefilters on the
487 allele. Below is an excerpt of a known variants resource with population
488 allele frequencies
489
490 ::
491
492 #CHROM POS ID REF ALT QUAL FILTER INFO
493 1 10067 . T TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 30.35 PASS AC=3;AF=7.384E-5
494 1 10108 . CAACCCT C 46514.32 PASS AC=6;AF=1.525E-4
495 1 10109 . AACCCTAACCCT AAACCCT,* 89837.27 PASS AC=48,5;AF=0.001223,1.273E-4
496 1 10114 . TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA *,CAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA,T 36728.97 PASS AC=55,9,1;AF=0.001373,2.246E-4,2.496E-5
497 1 10119 . CT C,* 251.23 PASS AC=5,1;AF=1.249E-4,2.498E-5
498 1 10120 . TA CA,* 14928.74 PASS AC=10,6;AF=2.5E-4,1.5E-4
499 1 10128 . ACCCTAACCCTAACCCTAAC A,* 285.71 PASS AC=3,1;AF=7.58E-5,2.527E-5
500 1 10131 . CT C,* 378.93 PASS AC=7,5;AF=1.765E-4,1.261E-4
501 1 10132 . TAACCC *,T 18025.11 PASS AC=12,2;AF=3.03E-4,5.049E-5
502
503
504 (ii) Tumor-only mode
505 ^^^^^^^^^^^^^^^^^^^^
506
507 This mode runs on a single sample, e.g. single tumor or single normal
508 sample. To create a PoN, call on each normal sample in this mode, then
509 use CreateSomaticPanelOfNormals to generate the PoN.
510
511 ::
512
513 gatk Mutect2 \
514 -R reference.fa \
515 -I sample.bam \
516 -tumor sample_name \
517 -O single_sample.vcf.gz
518
519
520 Further points of interest
521 ~~~~~~~~~~~~~~~~~~~~~~~~~~
522
523 Additional parameters that factor towards filtering, including
524 normal-artifact-lod (default threshold 0.0) and tumor-lod (default
525 threshold 5.3), are available in FilterMutectCalls. While the tool
526 calculates normal-lod assuming a diploid genotype, it calculates
527 normal-artifact-lod with the same approach it uses for tumor-lod, i.e.
528 with a variable ploidy assumption.
529
530 - If the normal artifact log odds becomes large, then FilterMutectCalls applies the artifact-in-normal filter. For matched normal samples with tumor contamination, consider increasing the normal-artifact-lod threshold.
531
532 - The tumor log odds, which is calculated independently of any matched normal, determines whether to filter a tumor variant. Variants with tumor LODs exceeding the threshold pass filtering.
533
534
535 If a variant is absent from a given germline resource, then the value
536 for --af-of-alleles-not-in-resource applies. For example, gnomAD's
537 16,000 samples (~32,000 homologs per locus) becomes a probability of one
538 in 32,000 or less. Thus, an allele's absence from the germline resource
539 becomes evidence that it is not a germline variant.
540
541 Caveats
542 ~~~~~~~
543
544 Although GATK4 Mutect2 accomodates varying coverage depths, further
545 optimization of parameters may improve calling for extreme high depths,
546 e.g. 1000X.
547 ]]></help>
548 <citations>
549 <expand macro="citations"/>
550 </citations>
551 </tool>