Mercurial > repos > dave > gatk4_mutect2
comparison gatk4_Mutect2.xml @ 0:c27f4eb641bf draft
Uploaded
| author | dave |
|---|---|
| date | Thu, 26 Sep 2019 09:23:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c27f4eb641bf |
|---|---|
| 1 <tool id="gatk4_mutect2" name="GATK4 Mutect2" version="@WRAPPER_VERSION@0" profile="18.05"> | |
| 2 <description>- Call somatic SNVs and indels via local assembly of haplotypes</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <expand macro="version_cmd"/> | |
| 8 <command detect_errors="exit_code"> | |
| 9 <![CDATA[ | |
| 10 #include source=$set_sections# | |
| 11 #include source=$pre_gatk_excl_ints_chth# | |
| 12 #include source=$bam_index_pre_chth# | |
| 13 #include source=$pre_gatk_ints_chth# | |
| 14 ln -s '$reference_source.reference_sequence' reference.fa && | |
| 15 samtools faidx reference.fa && | |
| 16 gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && | |
| 17 @CMD_BEGIN@ Mutect2 | |
| 18 | |
| 19 #include source=$gatk_bam_input# | |
| 20 | |
| 21 ## COMMON PARAMETERS ## | |
| 22 | |
| 23 #if str($common.common_parameters) == 'yes' | |
| 24 | |
| 25 --read-filter="$common.read_filter" | |
| 26 --seconds-between-progress-updates="$common.seconds_between_progress_updates" | |
| 27 --verbosity="$common.verbosity" | |
| 28 --read-validation-stringency="$common.read_validation_stringency" | |
| 29 --interval-set-rule="$common.interval_set_rule" | |
| 30 --gatk-config-file="$common.gatk_config_file" | |
| 31 --disable-read-filter="$common.disable_read_filter" | |
| 32 $common.QUIET | |
| 33 $common.use_jdk_deflater | |
| 34 $common.use_jdk_inflater | |
| 35 $common.lenient | |
| 36 $common.disable_tool_default_read_filters | |
| 37 $common.add_output_sam_program_record | |
| 38 $common.add_output_vcf_command_line | |
| 39 | |
| 40 #end if | |
| 41 | |
| 42 ## END COMMON PARAMETERS ## | |
| 43 | |
| 44 ## OPTIONAL PARAMETERS ## | |
| 45 | |
| 46 #if str($optional.optional_parameters) == 'yes' | |
| 47 | |
| 48 #if $optional.population_callset | |
| 49 #set datatype = $optional.population_callset.datatype | |
| 50 #if $optional.population_callset.is_of_type("vcf_bgzip") | |
| 51 ln -s $optional.population_callset population_callset.vcf.gz && | |
| 52 tabix population_callset.vcf.gz && | |
| 53 #else | |
| 54 ln -s $optional.population_callset population_callset.vcf && | |
| 55 #end if | |
| 56 #end if | |
| 57 | |
| 58 #if $optional.panel_of_normals | |
| 59 #set datatype = $optional.panel_of_normals.datatype | |
| 60 #if $optional.panel_of_normals.is_of_type("vcf_bgzip") | |
| 61 ln -s $optional.panel_of_normals panel_of_normals.vcf.gz && | |
| 62 tabix panel_of_normals.vcf.gz && | |
| 63 #else | |
| 64 ln -s $optional.panel_of_normals panel_of_normals.vcf && | |
| 65 #end if | |
| 66 #end if | |
| 67 | |
| 68 #if $optional.germline_resource | |
| 69 #set datatype = $optional.germline_resource.datatype | |
| 70 #if $optional.germline_resource.is_of_type("vcf_bgzip") | |
| 71 ln -s $optional.germline_resource germline_resource.vcf.gz && | |
| 72 tabix germline_resource.vcf.gz && | |
| 73 #else | |
| 74 ln -s $optional.germline_resource germline_resource.vcf && | |
| 75 #end if | |
| 76 #end if | |
| 77 | |
| 78 #if $optional.alleles | |
| 79 #set datatype = $optional.alleles.datatype | |
| 80 #if $optional.alleles.is_of_type("vcf_bgzip") | |
| 81 ln -s $optional.alleles alleles.vcf.gz && | |
| 82 tabix alleles.vcf.gz && | |
| 83 #else | |
| 84 ln -s $optional.alleles alleles.vcf && | |
| 85 #end if | |
| 86 #end if | |
| 87 | |
| 88 #if $optional.panel_of_normals | |
| 89 #if $optional.panel_of_normals.is_of_type("vcf_bgzip") | |
| 90 --panel-of-normals panel_of_normals.vcf.gz | |
| 91 #else | |
| 92 --panel-of-normals panel_of_normals.vcf | |
| 93 #end if | |
| 94 #end if | |
| 95 | |
| 96 #if $optional.pedigree | |
| 97 --pedigree="$optional.pedigree" | |
| 98 #end if | |
| 99 | |
| 100 #if $optional.alleles | |
| 101 #if $optional.alleles.is_of_type("vcf_bgzip") | |
| 102 --alleles alleles.vcf.gz | |
| 103 #else | |
| 104 --alleles alleles.vcf | |
| 105 #end if | |
| 106 #end if | |
| 107 | |
| 108 #if $optional.germline_resource | |
| 109 #if $optional.germline_resource.is_of_type("vcf_bgzip") | |
| 110 --germline-resource germline_resource.vcf.gz | |
| 111 #else | |
| 112 --germline-resource germline_resource.vcf | |
| 113 #end if | |
| 114 #end if | |
| 115 | |
| 116 #if $optional.population_callset | |
| 117 #if $optional.population_callset.is_of_type("vcf_bgzip") | |
| 118 --population-callset population_callset.vcf.gz | |
| 119 #else | |
| 120 --population-callset population_callset.vcf | |
| 121 #end if | |
| 122 #end if | |
| 123 | |
| 124 #if $optional.arguments_file | |
| 125 --arguments_file="$optional.arguments_file" | |
| 126 #end if | |
| 127 | |
| 128 --base-quality-score-threshold="$optional.base_quality_score_threshold" | |
| 129 --cloud-index-prefetch-buffer="$optional.cloud_index_prefetch_buffer" | |
| 130 --cloud-prefetch-buffer="$optional.cloud_prefetch_buffer" | |
| 131 --annotation="$optional.annotation" | |
| 132 --annotation-group="$optional.annotation_group" | |
| 133 --annotations-to-exclude="$optional.annotations_to_exclude" | |
| 134 --af-of-alleles-not-in-resource="$optional.af_of_alleles_not_in_resource" | |
| 135 --contamination-fraction-to-filter="$optional.contamination_fraction_to_filter" | |
| 136 --downsampling-stride="$optional.downsampling_stride" | |
| 137 --founder-id="$optional.founder_id" | |
| 138 --gcs-max-retries="$optional.gcs_max_retries" | |
| 139 --genotyping-mode="$optional.genotyping_mode" | |
| 140 --heterozygosity="$optional.heterozygosity" | |
| 141 --heterozygosity-stdev="$optional.heterozygosity_stdev" | |
| 142 --indel-heterozygosity="$optional.indel_heterozygosity" | |
| 143 --initial-tumor-lod="$optional.initial_tumor_lod" | |
| 144 --interval-merging-rule="$optional.interval_merging_rule" | |
| 145 --max-population-af="$optional.max_population_af" | |
| 146 --max-reads-per-alignment-start="$optional.max_reads_per_alignment_start" | |
| 147 --min-base-quality-score="$optional.min_base_quality_score" | |
| 148 --native-pair-hmm-threads="$optional.native_pair_hmm_threads" | |
| 149 --normal-lod="$optional.normal_lod" | |
| 150 --normal-sample="$optional.normal_sample" | |
| 151 --num-reference-samples-if-no-call="$optional.num_reference_samples_if_no_call" | |
| 152 --output-mode="$optional.output_mode" | |
| 153 --sample-ploidy="$optional.sample_ploidy" | |
| 154 --standard-min-confidence-threshold-for-calling="$optional.standard_min_confidence_threshold_for_calling" | |
| 155 --tumor-lod-to-emit="$optional.tumor_lod_to_emit" | |
| 156 $optional.annotate_with_num_discovered_alleles | |
| 157 $optional.disable_bam_index_caching | |
| 158 $optional.disable_sequence_dictionary_validation | |
| 159 $optional.genotype_germline_sites | |
| 160 $optional.genotype_pon_sites | |
| 161 $optional.native_pair_hmm_use_double_precision | |
| 162 $optional.sites_only_vcf_output | |
| 163 $optional.use_new_qual_calculator | |
| 164 #end if | |
| 165 | |
| 166 ## END OPTIONAL PARAMETERS ## | |
| 167 | |
| 168 ## ADVANCED PARAMETERS ## | |
| 169 | |
| 170 #if str($advanced.advanced_parameters) == 'yes' | |
| 171 --active-probability-threshold="$advanced.active_probability_threshold" | |
| 172 --assembly-region-padding="$advanced.assembly_region_padding" | |
| 173 --bam-writer-type="$advanced.bam_writer_type" | |
| 174 --contamination-fraction-per-sample-file="$advanced.contamination_fraction_per_sample_file" | |
| 175 --input-prior="$advanced.input_prior" | |
| 176 --kmer-size="$advanced.kmer_size" | |
| 177 --max-alternate-alleles="$advanced.max_alternate_alleles" | |
| 178 --max-assembly-region-size="$advanced.max_assembly_region_size" | |
| 179 --max-genotype-count="$advanced.max_genotype_count" | |
| 180 --max-mnp-distance="$advanced.max_mnp_distance" | |
| 181 --max-num-haplotypes-in-population="$advanced.max_num_haplotypes_in_population" | |
| 182 --max-prob-propagation-distance="$advanced.max_prob_propagation_distance" | |
| 183 --max-suspicious-reads-per-alignment-start="$advanced.max_suspicious_reads_per_alignment_start" | |
| 184 --min-assembly-region-size="$advanced.min_assembly_region_size" | |
| 185 --min-dangling-branch-length="$advanced.min_dangling_branch_length" | |
| 186 --min-pruning="$advanced.min_pruning" | |
| 187 --num-pruning-samples="$advanced.num_pruning_samples" | |
| 188 --pair-hmm-gap-continuation-penalty="$advanced.pair_hmm_gap_continuation_penalty" | |
| 189 --pair-hmm-implementation="$advanced.pair_hmm_implementation" | |
| 190 --pcr-indel-model="$advanced.pcr_indel_model" | |
| 191 --phred-scaled-global-read-mismapping-rate="$advanced.phred_scaled_global_read_mismapping_rate" | |
| 192 --smith-waterman="$advanced.smith_waterman" | |
| 193 $advanced.all_site_pls | |
| 194 $advanced.allow_non_unique_kmers_in_ref | |
| 195 $advanced.consensus | |
| 196 $advanced.debug | |
| 197 $advanced.disable_tool_default_annotations | |
| 198 $advanced.do_not_run_physical_phasing | |
| 199 $advanced.dont_increase_kmer_sizes_for_cycles | |
| 200 $advanced.dont_trim_active_regions | |
| 201 $advanced.dont_use_soft_clipped_bases | |
| 202 $advanced.enable_all_annotations | |
| 203 $advanced.genotype_filtered_alleles | |
| 204 $advanced.use_filtered_reads_for_annotations | |
| 205 | |
| 206 #end if | |
| 207 | |
| 208 ## END ADVANCED PARAMETERS ## | |
| 209 | |
| 210 ## ADDITIONAL OUTPUT PARAMETERS ## | |
| 211 | |
| 212 #if str($advanced.advanced_parameters) == 'yes' | |
| 213 | |
| 214 --activity-profile-out="$activity_profile_out" | |
| 215 --assembly-region-out="$assembly_region_out" | |
| 216 --bam-output="$bam_output" | |
| 217 --graph-output="$graph_output" | |
| 218 | |
| 219 #end if | |
| 220 | |
| 221 #include source=$gatk_excl_ints_chth# | |
| 222 #include source=$gatk_ints_chth# | |
| 223 #include source=$vcf_output_opts# | |
| 224 #include source=$ref_opts# | |
| 225 #include source=$gatk_seqdict# | |
| 226 #if $tumor_sample | |
| 227 --tumor-sample="$tumor_sample" | |
| 228 #end if | |
| 229 ]]> | |
| 230 </command> | |
| 231 <inputs> | |
| 232 <expand macro="gatk_bam_req_params"/> | |
| 233 <expand macro="gzip_vcf_params"/> | |
| 234 <expand macro="ref_sel"/> | |
| 235 <param name="tumor_sample" argument="--tumor-sample" type="text" optional="false" value="" label="Tumor Sample" help="BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument."/> | |
| 236 <conditional name="common"> | |
| 237 <param name="common_parameters" type="select" label="Common parameters"> | |
| 238 <option value="no">Use internal defaults</option> | |
| 239 <option value="yes">Specify parameters</option> | |
| 240 </param> | |
| 241 <when value="yes"> | |
| 242 <expand macro="gatk_excl_ints"/> | |
| 243 <expand macro="seq_dict_sel"/> | |
| 244 <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/> | |
| 245 <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/> | |
| 246 <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/> | |
| 247 <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/> | |
| 248 <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="" label="Gatk Config File" help="A configuration file to use with the GATK."/> | |
| 249 <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs"> | |
| 250 <option selected="true" value="UNION">UNION</option> | |
| 251 <option selected="false" value="INTERSECTION">INTERSECTION</option> | |
| 252 </param> | |
| 253 <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/> | |
| 254 <param name="QUIET" argument="--QUIET" type="boolean" truevalue="--QUIET" falsevalue="" optional="true" checked="false" label="Quiet" help="Whether to suppress job-summary info on System.err."/> | |
| 255 <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/> | |
| 256 <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."> | |
| 257 <option selected="false" value="STRICT">STRICT</option> | |
| 258 <option selected="false" value="LENIENT">LENIENT</option> | |
| 259 <option selected="true" value="SILENT">SILENT</option> | |
| 260 </param> | |
| 261 <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/> | |
| 262 <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/> | |
| 263 <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/> | |
| 264 <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging."> | |
| 265 <option selected="false" value="ERROR">ERROR</option> | |
| 266 <option selected="false" value="WARNING">WARNING</option> | |
| 267 <option selected="true" value="INFO">INFO</option> | |
| 268 <option selected="false" value="DEBUG">DEBUG</option> | |
| 269 </param> | |
| 270 </when> | |
| 271 <when value="no" /> | |
| 272 </conditional> | |
| 273 <conditional name="optional"> | |
| 274 <param name="optional_parameters" type="select" label="Optional parameters"> | |
| 275 <option value="no">Use internal defaults</option> | |
| 276 <option value="yes">Specify parameters</option> | |
| 277 </param> | |
| 278 <when value="yes"> | |
| 279 <expand macro="gatk_ints"/> | |
| 280 <param name="af_of_alleles_not_in_resource" argument="--af-of-alleles-not-in-resource" type="float" optional="true" value="-1.0" label="Af Of Alleles Not In Resource" help="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf fora derivation of the default value."/> | |
| 281 <param name="alleles" argument="--alleles" type="data" optional="true" format="" label="Alleles" help="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> | |
| 282 <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/> | |
| 283 <param name="annotation" argument="--annotation" type="text" optional="true" value="" label="Annotation" help="One or more specific annotations to add to variant calls"/> | |
| 284 <param name="annotation_group" argument="--annotation-group" type="text" optional="true" value="" label="Annotation Group" help="One or more groups of annotations to apply to variant calls"/> | |
| 285 <param name="annotations_to_exclude" argument="--annotations-to-exclude" type="text" optional="true" value="" label="Annotations To Exclude" help="One or more specific annotations to exclude from variant calls"/> | |
| 286 <param name="arguments_file" argument="--arguments_file" type="data" optional="true" format="" label="Arguments_File" help="read one or more arguments files and add them to the command line"/> | |
| 287 <param name="base_quality_score_threshold" argument="--base-quality-score-threshold" type="integer" optional="true" value="18" label="Base Quality Score Threshold" help="Base qualities below this threshold will be reduced to the minimum (6)"/> | |
| 288 <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/> | |
| 289 <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/> | |
| 290 <param name="contamination_fraction_to_filter" argument="--contamination-fraction-to-filter" type="float" optional="true" value="0.0" label="Contamination Fraction To Filter" help="Fraction of contamination in sequencing data (for all samples) to aggressively remove"/> | |
| 291 <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/> | |
| 292 <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/> | |
| 293 <param name="downsampling_stride" argument="--downsampling-stride" type="integer" optional="true" value="1" label="Downsampling Stride" help="Downsample a pool of reads starting within a range of one or more bases."/> | |
| 294 <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &quot;founders&quot;"/> | |
| 295 <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/> | |
| 296 <param name="genotype_germline_sites" argument="--genotype-germline-sites" type="boolean" truevalue="--genotype-germline-sites" falsevalue="" optional="true" checked="false" label="Genotype Germline Sites" help="(EXPERIMENTAL) Call all apparent germline site even though they will ultimately be filtered."/> | |
| 297 <param name="genotype_pon_sites" argument="--genotype-pon-sites" type="boolean" truevalue="--genotype-pon-sites" falsevalue="" optional="true" checked="false" label="Genotype Pon Sites" help="Call sites in the PoN even though they will ultimately be filtered."/> | |
| 298 <param name="genotyping_mode" argument="--genotyping-mode" type="select" optional="true" label="Genotyping Mode" help="Specifies how to determine the alternate alleles to use for genotyping"> | |
| 299 <option selected="true" value="DISCOVERY">DISCOVERY</option> | |
| 300 <option selected="false" value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> | |
| 301 </param> | |
| 302 <param name="germline_resource" argument="--germline-resource" type="data" optional="true" format="" label="Germline Resource" help="Population vcf of germline sequencing containing allele fractions."/> | |
| 303 <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/> | |
| 304 <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/> | |
| 305 <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/> | |
| 306 <param name="initial_tumor_lod" argument="--initial-tumor-lod" type="float" optional="true" value="2.0" label="Initial Tumor Lod" help="LOD threshold to consider pileup active."/> | |
| 307 <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals"> | |
| 308 <option selected="true" value="ALL">ALL</option> | |
| 309 <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option> | |
| 310 </param> | |
| 311 <param name="max_population_af" argument="--max-population-af" type="float" optional="true" value="0.01" label="Max Population Af" help="Maximum population allele frequency in tumor-only mode."/> | |
| 312 <param name="max_reads_per_alignment_start" argument="--max-reads-per-alignment-start" type="integer" optional="true" value="50" label="Max Reads Per Alignment Start" help="Maximum number of reads to retain per alignment start position. Reads above this threshold will be downsampled. Set to 0 to disable."/> | |
| 313 <param name="min_base_quality_score" argument="--min-base-quality-score" type="integer" optional="true" value="10" label="Min Base Quality Score" help="Minimum base quality required to consider a base for calling"/> | |
| 314 <param name="native_pair_hmm_threads" argument="--native-pair-hmm-threads" type="integer" optional="true" value="4" label="Native Pair Hmm Threads" help="How many threads should a native pairHMM implementation use"/> | |
| 315 <param name="native_pair_hmm_use_double_precision" argument="--native-pair-hmm-use-double-precision" type="boolean" truevalue="--native-pair-hmm-use-double-precision" falsevalue="" optional="true" checked="false" label="Native Pair Hmm Use Double Precision" help="use double precision in the native pairHmm. This is slower but matches the java implementation better"/> | |
| 316 <param name="normal_lod" argument="--normal-lod" type="float" optional="true" value="2.2" label="Normal Lod" help="LOD threshold for calling normal variant non-germline."/> | |
| 317 <param name="normal_sample" argument="--normal-sample" type="text" optional="true" value="" label="Normal Sample" help="BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument."/> | |
| 318 <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/> | |
| 319 <param name="output_mode" argument="--output-mode" type="select" optional="true" label="Output Mode" help="Specifies which type of calls we should output"> | |
| 320 <option selected="true" value="EMIT_VARIANTS_ONLY">EMIT_VARIANTS_ONLY</option> | |
| 321 <option selected="false" value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> | |
| 322 <option selected="false" value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> | |
| 323 </param> | |
| 324 <param name="panel_of_normals" argument="--panel-of-normals" type="data" optional="true" format="" label="Panel Of Normals" help="VCF file of sites observed in normal."/> | |
| 325 <param name="pedigree" argument="--pedigree" type="data" optional="true" format="" label="Pedigree" help="Pedigree file for determining the population &quot;founders&quot;"/> | |
| 326 <param name="population_callset" argument="--population-callset" type="data" optional="true" format="" label="Population Callset" help="Callset to use in calculating genotype priors"/> | |
| 327 <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/> | |
| 328 <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&apos;t emit genotype fields when writing vcf file output."/> | |
| 329 <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/> | |
| 330 <param name="tumor_lod_to_emit" argument="--tumor-lod-to-emit" type="float" optional="true" value="3.0" label="Tumor Lod To Emit" help="LOD threshold to emit tumor variant to VCF."/> | |
| 331 <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/> | |
| 332 </when> | |
| 333 <when value="no" /> | |
| 334 </conditional> | |
| 335 <conditional name="advanced"> | |
| 336 <param name="advanced_parameters" type="select" label="Advanced parameters"> | |
| 337 <option value="no">Use internal defaults</option> | |
| 338 <option value="yes">Specify parameters</option> | |
| 339 </param> | |
| 340 <when value="yes"> | |
| 341 <param name="active_probability_threshold" argument="--active-probability-threshold" type="float" optional="true" value="0.002" label="Active Probability Threshold" help="Minimum probability for a locus to be considered active."/> | |
| 342 <param name="all_site_pls" argument="--all-site-pls" type="boolean" truevalue="--all-site-pls" falsevalue="" optional="true" checked="false" label="All Site Pls" help="Annotate all sites with PLs"/> | |
| 343 <param name="allow_non_unique_kmers_in_ref" argument="--allow-non-unique-kmers-in-ref" type="boolean" truevalue="--allow-non-unique-kmers-in-ref" falsevalue="" optional="true" checked="false" label="Allow Non Unique Kmers In Ref" help="Allow graphs that have non-unique kmers in the reference"/> | |
| 344 <param name="assembly_region_padding" argument="--assembly-region-padding" type="integer" optional="true" value="100" label="Assembly Region Padding" help="Number of additional bases of context to include around each assembly region"/> | |
| 345 <param name="bam_writer_type" argument="--bam-writer-type" type="select" optional="true" label="Bam Writer Type" help="Which haplotypes should be written to the BAM"> | |
| 346 <option selected="false" value="ALL_POSSIBLE_HAPLOTYPES">ALL_POSSIBLE_HAPLOTYPES</option> | |
| 347 <option selected="true" value="CALLED_HAPLOTYPES">CALLED_HAPLOTYPES</option> | |
| 348 </param> | |
| 349 <param name="consensus" argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" optional="true" checked="false" label="Consensus" help="1000G consensus mode"/> | |
| 350 <param name="contamination_fraction_per_sample_file" argument="--contamination-fraction-per-sample-file" type="data" optional="true" format="" label="Contamination Fraction Per Sample File" help="Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be &quot;&lt;SampleID&gt;&lt;TAB&gt;&lt;Contamination&gt;&quot; (Contamination is double) per line; No header."/> | |
| 351 <param name="debug" argument="--debug" type="boolean" truevalue="--debug" falsevalue="" optional="true" checked="false" label="Debug" help="Print out very verbose debug information about each triggering active region"/> | |
| 352 <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/> | |
| 353 <param name="do_not_run_physical_phasing" argument="--do-not-run-physical-phasing" type="boolean" truevalue="--do-not-run-physical-phasing" falsevalue="" optional="true" checked="false" label="Do Not Run Physical Phasing" help="Disable physical phasing"/> | |
| 354 <param name="dont_increase_kmer_sizes_for_cycles" argument="--dont-increase-kmer-sizes-for-cycles" type="boolean" truevalue="--dont-increase-kmer-sizes-for-cycles" falsevalue="" optional="true" checked="false" label="Dont Increase Kmer Sizes For Cycles" help="Disable iterating over kmer sizes when graph cycles are detected"/> | |
| 355 <param name="dont_trim_active_regions" argument="--dont-trim-active-regions" type="boolean" truevalue="--dont-trim-active-regions" falsevalue="" optional="true" checked="false" label="Dont Trim Active Regions" help="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping"/> | |
| 356 <param name="dont_use_soft_clipped_bases" argument="--dont-use-soft-clipped-bases" type="boolean" truevalue="--dont-use-soft-clipped-bases" falsevalue="" optional="true" checked="false" label="Dont Use Soft Clipped Bases" help="Do not analyze soft clipped bases in the reads"/> | |
| 357 <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/> | |
| 358 <param name="genotype_filtered_alleles" argument="--genotype-filtered-alleles" type="boolean" truevalue="--genotype-filtered-alleles" falsevalue="" optional="true" checked="false" label="Genotype Filtered Alleles" help="Whether to genotype all given alleles, even filtered ones, --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> | |
| 359 <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/> | |
| 360 <param name="kmer_size" argument="--kmer-size" type="integer" optional="true" value="" label="Kmer Size" help="Kmer size to use in the read threading assembler"/> | |
| 361 <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/> | |
| 362 <param name="max_assembly_region_size" argument="--max-assembly-region-size" type="integer" optional="true" value="300" label="Max Assembly Region Size" help="Maximum size of an assembly region"/> | |
| 363 <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/> | |
| 364 <param name="max_mnp_distance" argument="--max-mnp-distance" type="integer" optional="true" value="1" label="Max Mnp Distance" help="Two or more phased substitutions separated by this distance or less are merged into MNPs."/> | |
| 365 <param name="max_num_haplotypes_in_population" argument="--max-num-haplotypes-in-population" type="integer" optional="true" value="128" label="Max Num Haplotypes In Population" help="Maximum number of haplotypes to consider for your population"/> | |
| 366 <param name="max_prob_propagation_distance" argument="--max-prob-propagation-distance" type="integer" optional="true" value="50" label="Max Prob Propagation Distance" help="Upper limit on how many bases away probability mass can be moved around when calculating the boundaries between active and inactive assembly regions"/> | |
| 367 <param name="max_suspicious_reads_per_alignment_start" argument="--max-suspicious-reads-per-alignment-start" type="integer" optional="true" value="0" label="Max Suspicious Reads Per Alignment Start" help="Maximum number of suspicious reads (mediocre mapping quality or too many substitutions) allowed in a downsampling stride. Set to 0 to disable."/> | |
| 368 <param name="min_assembly_region_size" argument="--min-assembly-region-size" type="integer" optional="true" value="50" label="Min Assembly Region Size" help="Minimum size of an assembly region"/> | |
| 369 <param name="min_dangling_branch_length" argument="--min-dangling-branch-length" type="integer" optional="true" value="4" label="Min Dangling Branch Length" help="Minimum length of a dangling branch to attempt recovery"/> | |
| 370 <param name="min_pruning" argument="--min-pruning" type="integer" optional="true" value="2" label="Min Pruning" help="Minimum support to not prune paths in the graph"/> | |
| 371 <param name="num_pruning_samples" argument="--num-pruning-samples" type="integer" optional="true" value="1" label="Num Pruning Samples" help="Number of samples that must pass the minPruning threshold"/> | |
| 372 <param name="pair_hmm_gap_continuation_penalty" argument="--pair-hmm-gap-continuation-penalty" type="integer" optional="true" value="10" label="Pair Hmm Gap Continuation Penalty" help="Flat gap continuation penalty for use in the Pair HMM"/> | |
| 373 <param name="pair_hmm_implementation" argument="--pair-hmm-implementation" type="select" optional="true" label="Pair Hmm Implementation" help="The PairHMM implementation to use for genotype likelihood calculations"> | |
| 374 <option selected="false" value="EXACT">EXACT</option> | |
| 375 <option selected="false" value="ORIGINAL">ORIGINAL</option> | |
| 376 <option selected="false" value="LOGLESS_CACHING">LOGLESS_CACHING</option> | |
| 377 <option selected="false" value="AVX_LOGLESS_CACHING">AVX_LOGLESS_CACHING</option> | |
| 378 <option selected="false" value="AVX_LOGLESS_CACHING_OMP">AVX_LOGLESS_CACHING_OMP</option> | |
| 379 <option selected="false" value="EXPERIMENTAL_FPGA_LOGLESS_CACHING">EXPERIMENTAL_FPGA_LOGLESS_CACHING</option> | |
| 380 <option selected="true" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> | |
| 381 </param> | |
| 382 <param name="pcr_indel_model" argument="--pcr-indel-model" type="select" optional="true" label="Pcr Indel Model" help="The PCR indel model to use"> | |
| 383 <option selected="false" value="NONE">NONE</option> | |
| 384 <option selected="false" value="HOSTILE">HOSTILE</option> | |
| 385 <option selected="false" value="AGGRESSIVE">AGGRESSIVE</option> | |
| 386 <option selected="true" value="CONSERVATIVE">CONSERVATIVE</option> | |
| 387 </param> | |
| 388 <param name="phred_scaled_global_read_mismapping_rate" argument="--phred-scaled-global-read-mismapping-rate" type="integer" optional="true" value="45" label="Phred Scaled Global Read Mismapping Rate" help="The global assumed mismapping rate for reads"/> | |
| 389 <param name="smith_waterman" argument="--smith-waterman" type="select" optional="true" label="Smith Waterman" help="Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice"> | |
| 390 <option selected="false" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> | |
| 391 <option selected="false" value="AVX_ENABLED">AVX_ENABLED</option> | |
| 392 <option selected="true" value="JAVA">JAVA</option> | |
| 393 </param> | |
| 394 <param name="use_filtered_reads_for_annotations" argument="--use-filtered-reads-for-annotations" type="boolean" truevalue="--use-filtered-reads-for-annotations" falsevalue="" optional="true" checked="false" label="Use Filtered Reads For Annotations" help="Use the contamination-filtered read maps for the purposes of annotating variants"/> | |
| 395 </when> | |
| 396 <when value="no" /> | |
| 397 </conditional> | |
| 398 <conditional name="output"> | |
| 399 <param name="output_parameters" type="select" label="Output parameters"> | |
| 400 <option value="no">Use internal defaults</option> | |
| 401 <option value="yes">Specify parameters</option> | |
| 402 </param> | |
| 403 <when value="yes"> | |
| 404 <param name="activity_profile_out_sel" argument="--activity_profile_out_sel" type="boolean" truevalue="--activity_profile_out_sel" falsevalue="" optional="true" checked="false" label="Activity Profile Out" help="Output the raw activity profile results in IGV format"/> | |
| 405 <param name="assembly_region_out_sel" argument="--assembly_region_out_sel" type="boolean" truevalue="--assembly_region_out_sel" falsevalue="" optional="true" checked="false" label="Assembly Region Out" help="Output the assembly region to this IGV formatted file"/> | |
| 406 <param name="bam_output_sel" argument="--bam_output_sel" type="boolean" truevalue="--bam_output_sel" falsevalue="" optional="true" checked="false" label="Bam Output" help="File to which assembled haplotypes should be written"/> | |
| 407 <param name="graph_output_sel" argument="--graph_output_sel" type="boolean" truevalue="--graph_output_sel" falsevalue="" optional="true" checked="false" label="Graph Output" help="Write debug assembly graph information to this file"/> | |
| 408 </when> | |
| 409 <when value="no" /> | |
| 410 </conditional> | |
| 411 </inputs> | |
| 412 <outputs> | |
| 413 <expand macro="gzip_vcf_output_params"/> | |
| 414 <data format="tabular" name="activity_profile_out" label="${tool.name} on ${on_string}: activity_profile_out tabular"> | |
| 415 <filter>output_opt['activity_profile_out_sel']</filter> | |
| 416 </data> | |
| 417 <data format="tabular" name="assembly_region_out" label="${tool.name} on ${on_string}: assembly_region_out tabular"> | |
| 418 <filter>output_opt['assembly_region_out_sel']</filter> | |
| 419 </data> | |
| 420 <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: bam_output bam"> | |
| 421 <filter>output_opt['bam_output_sel']</filter> | |
| 422 </data> | |
| 423 <data format="txt" name="graph_output" label="${tool.name} on ${on_string}: graph_output txt"> | |
| 424 <filter>output_opt['graph_output_sel']</filter> | |
| 425 </data> | |
| 426 </outputs> | |
| 427 <tests> | |
| 428 <test /> | |
| 429 </tests> | |
| 430 <help><![CDATA[Call somatic short variants via local assembly of haplotypes. Short | |
| 431 variants include single nucleotide (SNV) and insertion and deletion | |
| 432 (indel) variants. The caller combines the DREAM challenge-winning | |
| 433 somatic genotyping engine of the original MuTect (`Cibulskis et al., | |
| 434 2013 <http://www.nature.com/nbt/journal/v31/n3/full/nbt.2514.html>`__) | |
| 435 with the assembly-based machinery of | |
| 436 `HaplotypeCaller <https://www.broadinstitute.org/gatk/documentation/tooldocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php>`__. | |
| 437 | |
| 438 This tool is featured in the *Somatic Short Mutation calling Best | |
| 439 Practice Workflow*. See | |
| 440 `Tutorial#11136 <https://software.broadinstitute.org/gatk/documentation/article?id=11136>`__ | |
| 441 for a step-by-step description of the workflow and | |
| 442 `Article#11127 <https://software.broadinstitute.org/gatk/documentation/article?id=11127>`__ | |
| 443 for an overview of what traditional somatic calling entails. For the | |
| 444 latest pipeline scripts, see the `Mutect2 WDL scripts | |
| 445 directory <https://github.com/broadinstitute/gatk/tree/master/scripts/mutect2_wdl>`__. | |
| 446 Although we present the tool for somatic calling, it may apply to other | |
| 447 contexts, such as mitochondrial variant calling. | |
| 448 | |
| 449 Usage examples | |
| 450 ~~~~~~~~~~~~~~ | |
| 451 | |
| 452 Example commands show how to run Mutect2 for typical scenarios. The two | |
| 453 modes are (i) *somatic mode* where a tumor sample is matched with a | |
| 454 normal sample in analysis and (ii) *tumor-only mode* where a single | |
| 455 sample's alignment data undergoes analysis. | |
| 456 | |
| 457 (i) Tumor with matched normal | |
| 458 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 459 | |
| 460 Given a matched normal, Mutect2 is designed to call somatic variants | |
| 461 only. The tool includes logic to skip emitting variants that are clearly | |
| 462 present in the germline based on provided evidence, e.g. in the matched | |
| 463 normal. This is done at an early stage to avoid spending computational | |
| 464 resources on germline events. If the variant's germline status is | |
| 465 borderline, then Mutect2 will emit the variant to the callset for | |
| 466 subsequent filtering and review. | |
| 467 | |
| 468 :: | |
| 469 | |
| 470 gatk Mutect2 \ | |
| 471 -R reference.fa \ | |
| 472 -I tumor.bam \ | |
| 473 -tumor tumor_sample_name \ | |
| 474 -I normal.bam \ | |
| 475 -normal normal_sample_name \ | |
| 476 --germline-resource af-only-gnomad.vcf.gz \ | |
| 477 --af-of-alleles-not-in-resource 0.00003125 \ | |
| 478 --panel-of-normals pon.vcf.gz \ | |
| 479 -O somatic.vcf.gz | |
| 480 | |
| 481 | |
| 482 The --af-of-alleles-not-in-resource argument value should match | |
| 483 expectations for alleles not found in the provided germline resource. | |
| 484 Note the tool does not require a germline resource nor a panel of | |
| 485 normals (PoN) to run. The tool prefilters sites for the matched normal | |
| 486 and the PoN. For the germline resource, the tool prefilters on the | |
| 487 allele. Below is an excerpt of a known variants resource with population | |
| 488 allele frequencies | |
| 489 | |
| 490 :: | |
| 491 | |
| 492 #CHROM POS ID REF ALT QUAL FILTER INFO | |
| 493 1 10067 . T TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 30.35 PASS AC=3;AF=7.384E-5 | |
| 494 1 10108 . CAACCCT C 46514.32 PASS AC=6;AF=1.525E-4 | |
| 495 1 10109 . AACCCTAACCCT AAACCCT,* 89837.27 PASS AC=48,5;AF=0.001223,1.273E-4 | |
| 496 1 10114 . TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA *,CAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA,T 36728.97 PASS AC=55,9,1;AF=0.001373,2.246E-4,2.496E-5 | |
| 497 1 10119 . CT C,* 251.23 PASS AC=5,1;AF=1.249E-4,2.498E-5 | |
| 498 1 10120 . TA CA,* 14928.74 PASS AC=10,6;AF=2.5E-4,1.5E-4 | |
| 499 1 10128 . ACCCTAACCCTAACCCTAAC A,* 285.71 PASS AC=3,1;AF=7.58E-5,2.527E-5 | |
| 500 1 10131 . CT C,* 378.93 PASS AC=7,5;AF=1.765E-4,1.261E-4 | |
| 501 1 10132 . TAACCC *,T 18025.11 PASS AC=12,2;AF=3.03E-4,5.049E-5 | |
| 502 | |
| 503 | |
| 504 (ii) Tumor-only mode | |
| 505 ^^^^^^^^^^^^^^^^^^^^ | |
| 506 | |
| 507 This mode runs on a single sample, e.g. single tumor or single normal | |
| 508 sample. To create a PoN, call on each normal sample in this mode, then | |
| 509 use CreateSomaticPanelOfNormals to generate the PoN. | |
| 510 | |
| 511 :: | |
| 512 | |
| 513 gatk Mutect2 \ | |
| 514 -R reference.fa \ | |
| 515 -I sample.bam \ | |
| 516 -tumor sample_name \ | |
| 517 -O single_sample.vcf.gz | |
| 518 | |
| 519 | |
| 520 Further points of interest | |
| 521 ~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 522 | |
| 523 Additional parameters that factor towards filtering, including | |
| 524 normal-artifact-lod (default threshold 0.0) and tumor-lod (default | |
| 525 threshold 5.3), are available in FilterMutectCalls. While the tool | |
| 526 calculates normal-lod assuming a diploid genotype, it calculates | |
| 527 normal-artifact-lod with the same approach it uses for tumor-lod, i.e. | |
| 528 with a variable ploidy assumption. | |
| 529 | |
| 530 - If the normal artifact log odds becomes large, then FilterMutectCalls applies the artifact-in-normal filter. For matched normal samples with tumor contamination, consider increasing the normal-artifact-lod threshold. | |
| 531 | |
| 532 - The tumor log odds, which is calculated independently of any matched normal, determines whether to filter a tumor variant. Variants with tumor LODs exceeding the threshold pass filtering. | |
| 533 | |
| 534 | |
| 535 If a variant is absent from a given germline resource, then the value | |
| 536 for --af-of-alleles-not-in-resource applies. For example, gnomAD's | |
| 537 16,000 samples (~32,000 homologs per locus) becomes a probability of one | |
| 538 in 32,000 or less. Thus, an allele's absence from the germline resource | |
| 539 becomes evidence that it is not a germline variant. | |
| 540 | |
| 541 Caveats | |
| 542 ~~~~~~~ | |
| 543 | |
| 544 Although GATK4 Mutect2 accomodates varying coverage depths, further | |
| 545 optimization of parameters may improve calling for extreme high depths, | |
| 546 e.g. 1000X. | |
| 547 ]]></help> | |
| 548 <citations> | |
| 549 <expand macro="citations"/> | |
| 550 </citations> | |
| 551 </tool> |
