Mercurial > repos > dave > gatk4_mutect2
changeset 0:c27f4eb641bf draft
Uploaded
author | dave |
---|---|
date | Thu, 26 Sep 2019 09:23:23 -0400 |
parents | |
children | fcf1bc63ed06 |
files | gatk4_Mutect2.xml macros.xml |
diffstat | 2 files changed, 1261 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gatk4_Mutect2.xml Thu Sep 26 09:23:23 2019 -0400 @@ -0,0 +1,551 @@ +<tool id="gatk4_mutect2" name="GATK4 Mutect2" version="@WRAPPER_VERSION@0" profile="18.05"> + <description>- Call somatic SNVs and indels via local assembly of haplotypes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_cmd"/> + <command detect_errors="exit_code"> + <![CDATA[ + #include source=$set_sections# + #include source=$pre_gatk_excl_ints_chth# + #include source=$bam_index_pre_chth# + #include source=$pre_gatk_ints_chth# + ln -s '$reference_source.reference_sequence' reference.fa && + samtools faidx reference.fa && + gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && + @CMD_BEGIN@ Mutect2 + + #include source=$gatk_bam_input# + + ## COMMON PARAMETERS ## + + #if str($common.common_parameters) == 'yes' + + --read-filter="$common.read_filter" + --seconds-between-progress-updates="$common.seconds_between_progress_updates" + --verbosity="$common.verbosity" + --read-validation-stringency="$common.read_validation_stringency" + --interval-set-rule="$common.interval_set_rule" + --gatk-config-file="$common.gatk_config_file" + --disable-read-filter="$common.disable_read_filter" + $common.QUIET + $common.use_jdk_deflater + $common.use_jdk_inflater + $common.lenient + $common.disable_tool_default_read_filters + $common.add_output_sam_program_record + $common.add_output_vcf_command_line + + #end if + + ## END COMMON PARAMETERS ## + + ## OPTIONAL PARAMETERS ## + + #if str($optional.optional_parameters) == 'yes' + + #if $optional.population_callset + #set datatype = $optional.population_callset.datatype + #if $optional.population_callset.is_of_type("vcf_bgzip") + ln -s $optional.population_callset population_callset.vcf.gz && + tabix population_callset.vcf.gz && + #else + ln -s $optional.population_callset population_callset.vcf && + #end if + #end if + + #if $optional.panel_of_normals + #set datatype = $optional.panel_of_normals.datatype + #if $optional.panel_of_normals.is_of_type("vcf_bgzip") + ln -s $optional.panel_of_normals panel_of_normals.vcf.gz && + tabix panel_of_normals.vcf.gz && + #else + ln -s $optional.panel_of_normals panel_of_normals.vcf && + #end if + #end if + + #if $optional.germline_resource + #set datatype = $optional.germline_resource.datatype + #if $optional.germline_resource.is_of_type("vcf_bgzip") + ln -s $optional.germline_resource germline_resource.vcf.gz && + tabix germline_resource.vcf.gz && + #else + ln -s $optional.germline_resource germline_resource.vcf && + #end if + #end if + + #if $optional.alleles + #set datatype = $optional.alleles.datatype + #if $optional.alleles.is_of_type("vcf_bgzip") + ln -s $optional.alleles alleles.vcf.gz && + tabix alleles.vcf.gz && + #else + ln -s $optional.alleles alleles.vcf && + #end if + #end if + + #if $optional.panel_of_normals + #if $optional.panel_of_normals.is_of_type("vcf_bgzip") + --panel-of-normals panel_of_normals.vcf.gz + #else + --panel-of-normals panel_of_normals.vcf + #end if + #end if + + #if $optional.pedigree + --pedigree="$optional.pedigree" + #end if + + #if $optional.alleles + #if $optional.alleles.is_of_type("vcf_bgzip") + --alleles alleles.vcf.gz + #else + --alleles alleles.vcf + #end if + #end if + + #if $optional.germline_resource + #if $optional.germline_resource.is_of_type("vcf_bgzip") + --germline-resource germline_resource.vcf.gz + #else + --germline-resource germline_resource.vcf + #end if + #end if + + #if $optional.population_callset + #if $optional.population_callset.is_of_type("vcf_bgzip") + --population-callset population_callset.vcf.gz + #else + --population-callset population_callset.vcf + #end if + #end if + + #if $optional.arguments_file + --arguments_file="$optional.arguments_file" + #end if + + --base-quality-score-threshold="$optional.base_quality_score_threshold" + --cloud-index-prefetch-buffer="$optional.cloud_index_prefetch_buffer" + --cloud-prefetch-buffer="$optional.cloud_prefetch_buffer" + --annotation="$optional.annotation" + --annotation-group="$optional.annotation_group" + --annotations-to-exclude="$optional.annotations_to_exclude" + --af-of-alleles-not-in-resource="$optional.af_of_alleles_not_in_resource" + --contamination-fraction-to-filter="$optional.contamination_fraction_to_filter" + --downsampling-stride="$optional.downsampling_stride" + --founder-id="$optional.founder_id" + --gcs-max-retries="$optional.gcs_max_retries" + --genotyping-mode="$optional.genotyping_mode" + --heterozygosity="$optional.heterozygosity" + --heterozygosity-stdev="$optional.heterozygosity_stdev" + --indel-heterozygosity="$optional.indel_heterozygosity" + --initial-tumor-lod="$optional.initial_tumor_lod" + --interval-merging-rule="$optional.interval_merging_rule" + --max-population-af="$optional.max_population_af" + --max-reads-per-alignment-start="$optional.max_reads_per_alignment_start" + --min-base-quality-score="$optional.min_base_quality_score" + --native-pair-hmm-threads="$optional.native_pair_hmm_threads" + --normal-lod="$optional.normal_lod" + --normal-sample="$optional.normal_sample" + --num-reference-samples-if-no-call="$optional.num_reference_samples_if_no_call" + --output-mode="$optional.output_mode" + --sample-ploidy="$optional.sample_ploidy" + --standard-min-confidence-threshold-for-calling="$optional.standard_min_confidence_threshold_for_calling" + --tumor-lod-to-emit="$optional.tumor_lod_to_emit" + $optional.annotate_with_num_discovered_alleles + $optional.disable_bam_index_caching + $optional.disable_sequence_dictionary_validation + $optional.genotype_germline_sites + $optional.genotype_pon_sites + $optional.native_pair_hmm_use_double_precision + $optional.sites_only_vcf_output + $optional.use_new_qual_calculator + #end if + + ## END OPTIONAL PARAMETERS ## + + ## ADVANCED PARAMETERS ## + + #if str($advanced.advanced_parameters) == 'yes' + --active-probability-threshold="$advanced.active_probability_threshold" + --assembly-region-padding="$advanced.assembly_region_padding" + --bam-writer-type="$advanced.bam_writer_type" + --contamination-fraction-per-sample-file="$advanced.contamination_fraction_per_sample_file" + --input-prior="$advanced.input_prior" + --kmer-size="$advanced.kmer_size" + --max-alternate-alleles="$advanced.max_alternate_alleles" + --max-assembly-region-size="$advanced.max_assembly_region_size" + --max-genotype-count="$advanced.max_genotype_count" + --max-mnp-distance="$advanced.max_mnp_distance" + --max-num-haplotypes-in-population="$advanced.max_num_haplotypes_in_population" + --max-prob-propagation-distance="$advanced.max_prob_propagation_distance" + --max-suspicious-reads-per-alignment-start="$advanced.max_suspicious_reads_per_alignment_start" + --min-assembly-region-size="$advanced.min_assembly_region_size" + --min-dangling-branch-length="$advanced.min_dangling_branch_length" + --min-pruning="$advanced.min_pruning" + --num-pruning-samples="$advanced.num_pruning_samples" + --pair-hmm-gap-continuation-penalty="$advanced.pair_hmm_gap_continuation_penalty" + --pair-hmm-implementation="$advanced.pair_hmm_implementation" + --pcr-indel-model="$advanced.pcr_indel_model" + --phred-scaled-global-read-mismapping-rate="$advanced.phred_scaled_global_read_mismapping_rate" + --smith-waterman="$advanced.smith_waterman" + $advanced.all_site_pls + $advanced.allow_non_unique_kmers_in_ref + $advanced.consensus + $advanced.debug + $advanced.disable_tool_default_annotations + $advanced.do_not_run_physical_phasing + $advanced.dont_increase_kmer_sizes_for_cycles + $advanced.dont_trim_active_regions + $advanced.dont_use_soft_clipped_bases + $advanced.enable_all_annotations + $advanced.genotype_filtered_alleles + $advanced.use_filtered_reads_for_annotations + + #end if + + ## END ADVANCED PARAMETERS ## + + ## ADDITIONAL OUTPUT PARAMETERS ## + + #if str($advanced.advanced_parameters) == 'yes' + + --activity-profile-out="$activity_profile_out" + --assembly-region-out="$assembly_region_out" + --bam-output="$bam_output" + --graph-output="$graph_output" + + #end if + + #include source=$gatk_excl_ints_chth# + #include source=$gatk_ints_chth# + #include source=$vcf_output_opts# + #include source=$ref_opts# + #include source=$gatk_seqdict# + #if $tumor_sample + --tumor-sample="$tumor_sample" + #end if + ]]> + </command> + <inputs> + <expand macro="gatk_bam_req_params"/> + <expand macro="gzip_vcf_params"/> + <expand macro="ref_sel"/> + <param name="tumor_sample" argument="--tumor-sample" type="text" optional="false" value="" label="Tumor Sample" help="BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument."/> + <conditional name="common"> + <param name="common_parameters" type="select" label="Common parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <expand macro="gatk_excl_ints"/> + <expand macro="seq_dict_sel"/> + <param name="add_output_sam_program_record" argument="--add-output-sam-program-record" type="boolean" truevalue="--add-output-sam-program-record" falsevalue="" optional="true" checked="true" label="Add Output Sam Program Record" help="If true, adds a PG tag to created SAM/BAM/CRAM files."/> + <param name="add_output_vcf_command_line" argument="--add-output-vcf-command-line" type="boolean" truevalue="--add-output-vcf-command-line" falsevalue="" optional="true" checked="true" label="Add Output Vcf Command Line" help="If true, adds a command line header line to created VCF files."/> + <param name="disable_read_filter" argument="--disable-read-filter" type="text" optional="true" value="" label="Disable Read Filter" help="Read filters to be disabled before analysis"/> + <param name="disable_tool_default_read_filters" argument="--disable-tool-default-read-filters" type="boolean" truevalue="--disable-tool-default-read-filters" falsevalue="" optional="true" checked="false" label="Disable Tool Default Read Filters" help="Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on)"/> + <param name="gatk_config_file" argument="--gatk-config-file" type="data" optional="true" format="" label="Gatk Config File" help="A configuration file to use with the GATK."/> + <param name="interval_set_rule" argument="--interval-set-rule" type="select" optional="true" label="Interval Set Rule" help="Set merging approach to use for combining interval inputs"> + <option selected="true" value="UNION">UNION</option> + <option selected="false" value="INTERSECTION">INTERSECTION</option> + </param> + <param name="lenient" argument="--lenient" type="boolean" truevalue="--lenient" falsevalue="" optional="true" checked="false" label="Lenient" help="Lenient processing of VCF files"/> + <param name="QUIET" argument="--QUIET" type="boolean" truevalue="--QUIET" falsevalue="" optional="true" checked="false" label="Quiet" help="Whether to suppress job-summary info on System.err."/> + <param name="read_filter" argument="--read-filter" type="text" optional="true" value="" label="Read Filter" help="Read filters to be applied before analysis"/> + <param name="read_validation_stringency" argument="--read-validation-stringency" type="select" optional="true" label="Read Validation Stringency" help="Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded."> + <option selected="false" value="STRICT">STRICT</option> + <option selected="false" value="LENIENT">LENIENT</option> + <option selected="true" value="SILENT">SILENT</option> + </param> + <param name="seconds_between_progress_updates" argument="--seconds-between-progress-updates" type="float" optional="true" value="10.0" label="Seconds Between Progress Updates" help="Output traversal statistics every time this many seconds elapse"/> + <param name="use_jdk_deflater" argument="--use-jdk-deflater" type="boolean" truevalue="--use-jdk-deflater" falsevalue="" optional="true" checked="false" label="Use Jdk Deflater" help="Whether to use the JdkDeflater (as opposed to IntelDeflater)"/> + <param name="use_jdk_inflater" argument="--use-jdk-inflater" type="boolean" truevalue="--use-jdk-inflater" falsevalue="" optional="true" checked="false" label="Use Jdk Inflater" help="Whether to use the JdkInflater (as opposed to IntelInflater)"/> + <param name="verbosity" argument="--verbosity" type="select" optional="true" label="Verbosity" help="Control verbosity of logging."> + <option selected="false" value="ERROR">ERROR</option> + <option selected="false" value="WARNING">WARNING</option> + <option selected="true" value="INFO">INFO</option> + <option selected="false" value="DEBUG">DEBUG</option> + </param> + </when> + <when value="no" /> + </conditional> + <conditional name="optional"> + <param name="optional_parameters" type="select" label="Optional parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <expand macro="gatk_ints"/> + <param name="af_of_alleles_not_in_resource" argument="--af-of-alleles-not-in-resource" type="float" optional="true" value="-1.0" label="Af Of Alleles Not In Resource" help="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf fora derivation of the default value."/> + <param name="alleles" argument="--alleles" type="data" optional="true" format="" label="Alleles" help="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> + <param name="annotate_with_num_discovered_alleles" argument="--annotate-with-num-discovered-alleles" type="boolean" truevalue="--annotate-with-num-discovered-alleles" falsevalue="" optional="true" checked="false" label="Annotate With Num Discovered Alleles" help="If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site"/> + <param name="annotation" argument="--annotation" type="text" optional="true" value="" label="Annotation" help="One or more specific annotations to add to variant calls"/> + <param name="annotation_group" argument="--annotation-group" type="text" optional="true" value="" label="Annotation Group" help="One or more groups of annotations to apply to variant calls"/> + <param name="annotations_to_exclude" argument="--annotations-to-exclude" type="text" optional="true" value="" label="Annotations To Exclude" help="One or more specific annotations to exclude from variant calls"/> + <param name="arguments_file" argument="--arguments_file" type="data" optional="true" format="" label="Arguments_File" help="read one or more arguments files and add them to the command line"/> + <param name="base_quality_score_threshold" argument="--base-quality-score-threshold" type="integer" optional="true" value="18" label="Base Quality Score Threshold" help="Base qualities below this threshold will be reduced to the minimum (6)"/> + <param name="cloud_index_prefetch_buffer" argument="--cloud-index-prefetch-buffer" type="integer" optional="true" value="-1" label="Cloud Index Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset."/> + <param name="cloud_prefetch_buffer" argument="--cloud-prefetch-buffer" type="integer" optional="true" value="40" label="Cloud Prefetch Buffer" help="Size of the cloud-only prefetch buffer (in MB; 0 to disable)."/> + <param name="contamination_fraction_to_filter" argument="--contamination-fraction-to-filter" type="float" optional="true" value="0.0" label="Contamination Fraction To Filter" help="Fraction of contamination in sequencing data (for all samples) to aggressively remove"/> + <param name="disable_bam_index_caching" argument="--disable-bam-index-caching" type="boolean" truevalue="--disable-bam-index-caching" falsevalue="" optional="true" checked="false" label="Disable Bam Index Caching" help="If true, don&apos;t cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified."/> + <param name="disable_sequence_dictionary_validation" argument="--disable-sequence-dictionary-validation" type="boolean" truevalue="--disable-sequence-dictionary-validation" falsevalue="" optional="true" checked="false" label="Disable Sequence Dictionary Validation" help="If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk!"/> + <param name="downsampling_stride" argument="--downsampling-stride" type="integer" optional="true" value="1" label="Downsampling Stride" help="Downsample a pool of reads starting within a range of one or more bases."/> + <param name="founder_id" argument="--founder-id" type="text" optional="true" value="" label="Founder Id" help="Samples representing the population &quot;founders&quot;"/> + <param name="gcs_max_retries" argument="--gcs-max-retries" type="integer" optional="true" value="20" label="Gcs Max Retries" help="If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection"/> + <param name="genotype_germline_sites" argument="--genotype-germline-sites" type="boolean" truevalue="--genotype-germline-sites" falsevalue="" optional="true" checked="false" label="Genotype Germline Sites" help="(EXPERIMENTAL) Call all apparent germline site even though they will ultimately be filtered."/> + <param name="genotype_pon_sites" argument="--genotype-pon-sites" type="boolean" truevalue="--genotype-pon-sites" falsevalue="" optional="true" checked="false" label="Genotype Pon Sites" help="Call sites in the PoN even though they will ultimately be filtered."/> + <param name="genotyping_mode" argument="--genotyping-mode" type="select" optional="true" label="Genotyping Mode" help="Specifies how to determine the alternate alleles to use for genotyping"> + <option selected="true" value="DISCOVERY">DISCOVERY</option> + <option selected="false" value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> + </param> + <param name="germline_resource" argument="--germline-resource" type="data" optional="true" format="" label="Germline Resource" help="Population vcf of germline sequencing containing allele fractions."/> + <param name="heterozygosity" argument="--heterozygosity" type="float" optional="true" value="0.001" label="Heterozygosity" help="Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept"/> + <param name="heterozygosity_stdev" argument="--heterozygosity-stdev" type="float" optional="true" value="0.01" label="Heterozygosity Stdev" help="Standard deviation of heterozygosity for SNP and indel calling."/> + <param name="indel_heterozygosity" argument="--indel-heterozygosity" type="float" optional="true" value="0.000125" label="Indel Heterozygosity" help="Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept"/> + <param name="initial_tumor_lod" argument="--initial-tumor-lod" type="float" optional="true" value="2.0" label="Initial Tumor Lod" help="LOD threshold to consider pileup active."/> + <param name="interval_merging_rule" argument="--interval-merging-rule" type="select" optional="true" label="Interval Merging Rule" help="Interval merging rule for abutting intervals"> + <option selected="true" value="ALL">ALL</option> + <option selected="false" value="OVERLAPPING_ONLY">OVERLAPPING_ONLY</option> + </param> + <param name="max_population_af" argument="--max-population-af" type="float" optional="true" value="0.01" label="Max Population Af" help="Maximum population allele frequency in tumor-only mode."/> + <param name="max_reads_per_alignment_start" argument="--max-reads-per-alignment-start" type="integer" optional="true" value="50" label="Max Reads Per Alignment Start" help="Maximum number of reads to retain per alignment start position. Reads above this threshold will be downsampled. Set to 0 to disable."/> + <param name="min_base_quality_score" argument="--min-base-quality-score" type="integer" optional="true" value="10" label="Min Base Quality Score" help="Minimum base quality required to consider a base for calling"/> + <param name="native_pair_hmm_threads" argument="--native-pair-hmm-threads" type="integer" optional="true" value="4" label="Native Pair Hmm Threads" help="How many threads should a native pairHMM implementation use"/> + <param name="native_pair_hmm_use_double_precision" argument="--native-pair-hmm-use-double-precision" type="boolean" truevalue="--native-pair-hmm-use-double-precision" falsevalue="" optional="true" checked="false" label="Native Pair Hmm Use Double Precision" help="use double precision in the native pairHmm. This is slower but matches the java implementation better"/> + <param name="normal_lod" argument="--normal-lod" type="float" optional="true" value="2.2" label="Normal Lod" help="LOD threshold for calling normal variant non-germline."/> + <param name="normal_sample" argument="--normal-sample" type="text" optional="true" value="" label="Normal Sample" help="BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument."/> + <param name="num_reference_samples_if_no_call" argument="--num-reference-samples-if-no-call" type="integer" optional="true" value="0" label="Num Reference Samples If No Call" help="Number of hom-ref genotypes to infer at sites not present in a panel"/> + <param name="output_mode" argument="--output-mode" type="select" optional="true" label="Output Mode" help="Specifies which type of calls we should output"> + <option selected="true" value="EMIT_VARIANTS_ONLY">EMIT_VARIANTS_ONLY</option> + <option selected="false" value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> + <option selected="false" value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> + </param> + <param name="panel_of_normals" argument="--panel-of-normals" type="data" optional="true" format="" label="Panel Of Normals" help="VCF file of sites observed in normal."/> + <param name="pedigree" argument="--pedigree" type="data" optional="true" format="" label="Pedigree" help="Pedigree file for determining the population &quot;founders&quot;"/> + <param name="population_callset" argument="--population-callset" type="data" optional="true" format="" label="Population Callset" help="Callset to use in calculating genotype priors"/> + <param name="sample_ploidy" argument="--sample-ploidy" type="integer" optional="true" value="2" label="Sample Ploidy" help="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)."/> + <param name="sites_only_vcf_output" argument="--sites-only-vcf-output" type="boolean" truevalue="--sites-only-vcf-output" falsevalue="" optional="true" checked="false" label="Sites Only Vcf Output" help="If true, don&apos;t emit genotype fields when writing vcf file output."/> + <param name="standard_min_confidence_threshold_for_calling" argument="--standard-min-confidence-threshold-for-calling" type="float" optional="true" value="10.0" label="Standard Min Confidence Threshold For Calling" help="The minimum phred-scaled confidence threshold at which variants should be called"/> + <param name="tumor_lod_to_emit" argument="--tumor-lod-to-emit" type="float" optional="true" value="3.0" label="Tumor Lod To Emit" help="LOD threshold to emit tumor variant to VCF."/> + <param name="use_new_qual_calculator" argument="--use-new-qual-calculator" type="boolean" truevalue="--use-new-qual-calculator" falsevalue="" optional="true" checked="false" label="Use New Qual Calculator" help="If provided, we will use the new AF model instead of the so-called exact model"/> + </when> + <when value="no" /> + </conditional> + <conditional name="advanced"> + <param name="advanced_parameters" type="select" label="Advanced parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <param name="active_probability_threshold" argument="--active-probability-threshold" type="float" optional="true" value="0.002" label="Active Probability Threshold" help="Minimum probability for a locus to be considered active."/> + <param name="all_site_pls" argument="--all-site-pls" type="boolean" truevalue="--all-site-pls" falsevalue="" optional="true" checked="false" label="All Site Pls" help="Annotate all sites with PLs"/> + <param name="allow_non_unique_kmers_in_ref" argument="--allow-non-unique-kmers-in-ref" type="boolean" truevalue="--allow-non-unique-kmers-in-ref" falsevalue="" optional="true" checked="false" label="Allow Non Unique Kmers In Ref" help="Allow graphs that have non-unique kmers in the reference"/> + <param name="assembly_region_padding" argument="--assembly-region-padding" type="integer" optional="true" value="100" label="Assembly Region Padding" help="Number of additional bases of context to include around each assembly region"/> + <param name="bam_writer_type" argument="--bam-writer-type" type="select" optional="true" label="Bam Writer Type" help="Which haplotypes should be written to the BAM"> + <option selected="false" value="ALL_POSSIBLE_HAPLOTYPES">ALL_POSSIBLE_HAPLOTYPES</option> + <option selected="true" value="CALLED_HAPLOTYPES">CALLED_HAPLOTYPES</option> + </param> + <param name="consensus" argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" optional="true" checked="false" label="Consensus" help="1000G consensus mode"/> + <param name="contamination_fraction_per_sample_file" argument="--contamination-fraction-per-sample-file" type="data" optional="true" format="" label="Contamination Fraction Per Sample File" help="Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be &quot;&lt;SampleID&gt;&lt;TAB&gt;&lt;Contamination&gt;&quot; (Contamination is double) per line; No header."/> + <param name="debug" argument="--debug" type="boolean" truevalue="--debug" falsevalue="" optional="true" checked="false" label="Debug" help="Print out very verbose debug information about each triggering active region"/> + <param name="disable_tool_default_annotations" argument="--disable-tool-default-annotations" type="boolean" truevalue="--disable-tool-default-annotations" falsevalue="" optional="true" checked="false" label="Disable Tool Default Annotations" help="Disable all tool default annotations"/> + <param name="do_not_run_physical_phasing" argument="--do-not-run-physical-phasing" type="boolean" truevalue="--do-not-run-physical-phasing" falsevalue="" optional="true" checked="false" label="Do Not Run Physical Phasing" help="Disable physical phasing"/> + <param name="dont_increase_kmer_sizes_for_cycles" argument="--dont-increase-kmer-sizes-for-cycles" type="boolean" truevalue="--dont-increase-kmer-sizes-for-cycles" falsevalue="" optional="true" checked="false" label="Dont Increase Kmer Sizes For Cycles" help="Disable iterating over kmer sizes when graph cycles are detected"/> + <param name="dont_trim_active_regions" argument="--dont-trim-active-regions" type="boolean" truevalue="--dont-trim-active-regions" falsevalue="" optional="true" checked="false" label="Dont Trim Active Regions" help="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping"/> + <param name="dont_use_soft_clipped_bases" argument="--dont-use-soft-clipped-bases" type="boolean" truevalue="--dont-use-soft-clipped-bases" falsevalue="" optional="true" checked="false" label="Dont Use Soft Clipped Bases" help="Do not analyze soft clipped bases in the reads"/> + <param name="enable_all_annotations" argument="--enable-all-annotations" type="boolean" truevalue="--enable-all-annotations" falsevalue="" optional="true" checked="false" label="Enable All Annotations" help="Use all possible annotations (not for the faint of heart)"/> + <param name="genotype_filtered_alleles" argument="--genotype-filtered-alleles" type="boolean" truevalue="--genotype-filtered-alleles" falsevalue="" optional="true" checked="false" label="Genotype Filtered Alleles" help="Whether to genotype all given alleles, even filtered ones, --genotyping_mode is GENOTYPE_GIVEN_ALLELES"/> + <param name="input_prior" argument="--input-prior" type="text" optional="true" value="" label="Input Prior" help="Input prior for calls"/> + <param name="kmer_size" argument="--kmer-size" type="integer" optional="true" value="" label="Kmer Size" help="Kmer size to use in the read threading assembler"/> + <param name="max_alternate_alleles" argument="--max-alternate-alleles" type="integer" optional="true" value="6" label="Max Alternate Alleles" help="Maximum number of alternate alleles to genotype"/> + <param name="max_assembly_region_size" argument="--max-assembly-region-size" type="integer" optional="true" value="300" label="Max Assembly Region Size" help="Maximum size of an assembly region"/> + <param name="max_genotype_count" argument="--max-genotype-count" type="integer" optional="true" value="1024" label="Max Genotype Count" help="Maximum number of genotypes to consider at any site"/> + <param name="max_mnp_distance" argument="--max-mnp-distance" type="integer" optional="true" value="1" label="Max Mnp Distance" help="Two or more phased substitutions separated by this distance or less are merged into MNPs."/> + <param name="max_num_haplotypes_in_population" argument="--max-num-haplotypes-in-population" type="integer" optional="true" value="128" label="Max Num Haplotypes In Population" help="Maximum number of haplotypes to consider for your population"/> + <param name="max_prob_propagation_distance" argument="--max-prob-propagation-distance" type="integer" optional="true" value="50" label="Max Prob Propagation Distance" help="Upper limit on how many bases away probability mass can be moved around when calculating the boundaries between active and inactive assembly regions"/> + <param name="max_suspicious_reads_per_alignment_start" argument="--max-suspicious-reads-per-alignment-start" type="integer" optional="true" value="0" label="Max Suspicious Reads Per Alignment Start" help="Maximum number of suspicious reads (mediocre mapping quality or too many substitutions) allowed in a downsampling stride. Set to 0 to disable."/> + <param name="min_assembly_region_size" argument="--min-assembly-region-size" type="integer" optional="true" value="50" label="Min Assembly Region Size" help="Minimum size of an assembly region"/> + <param name="min_dangling_branch_length" argument="--min-dangling-branch-length" type="integer" optional="true" value="4" label="Min Dangling Branch Length" help="Minimum length of a dangling branch to attempt recovery"/> + <param name="min_pruning" argument="--min-pruning" type="integer" optional="true" value="2" label="Min Pruning" help="Minimum support to not prune paths in the graph"/> + <param name="num_pruning_samples" argument="--num-pruning-samples" type="integer" optional="true" value="1" label="Num Pruning Samples" help="Number of samples that must pass the minPruning threshold"/> + <param name="pair_hmm_gap_continuation_penalty" argument="--pair-hmm-gap-continuation-penalty" type="integer" optional="true" value="10" label="Pair Hmm Gap Continuation Penalty" help="Flat gap continuation penalty for use in the Pair HMM"/> + <param name="pair_hmm_implementation" argument="--pair-hmm-implementation" type="select" optional="true" label="Pair Hmm Implementation" help="The PairHMM implementation to use for genotype likelihood calculations"> + <option selected="false" value="EXACT">EXACT</option> + <option selected="false" value="ORIGINAL">ORIGINAL</option> + <option selected="false" value="LOGLESS_CACHING">LOGLESS_CACHING</option> + <option selected="false" value="AVX_LOGLESS_CACHING">AVX_LOGLESS_CACHING</option> + <option selected="false" value="AVX_LOGLESS_CACHING_OMP">AVX_LOGLESS_CACHING_OMP</option> + <option selected="false" value="EXPERIMENTAL_FPGA_LOGLESS_CACHING">EXPERIMENTAL_FPGA_LOGLESS_CACHING</option> + <option selected="true" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> + </param> + <param name="pcr_indel_model" argument="--pcr-indel-model" type="select" optional="true" label="Pcr Indel Model" help="The PCR indel model to use"> + <option selected="false" value="NONE">NONE</option> + <option selected="false" value="HOSTILE">HOSTILE</option> + <option selected="false" value="AGGRESSIVE">AGGRESSIVE</option> + <option selected="true" value="CONSERVATIVE">CONSERVATIVE</option> + </param> + <param name="phred_scaled_global_read_mismapping_rate" argument="--phred-scaled-global-read-mismapping-rate" type="integer" optional="true" value="45" label="Phred Scaled Global Read Mismapping Rate" help="The global assumed mismapping rate for reads"/> + <param name="smith_waterman" argument="--smith-waterman" type="select" optional="true" label="Smith Waterman" help="Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice"> + <option selected="false" value="FASTEST_AVAILABLE">FASTEST_AVAILABLE</option> + <option selected="false" value="AVX_ENABLED">AVX_ENABLED</option> + <option selected="true" value="JAVA">JAVA</option> + </param> + <param name="use_filtered_reads_for_annotations" argument="--use-filtered-reads-for-annotations" type="boolean" truevalue="--use-filtered-reads-for-annotations" falsevalue="" optional="true" checked="false" label="Use Filtered Reads For Annotations" help="Use the contamination-filtered read maps for the purposes of annotating variants"/> + </when> + <when value="no" /> + </conditional> + <conditional name="output"> + <param name="output_parameters" type="select" label="Output parameters"> + <option value="no">Use internal defaults</option> + <option value="yes">Specify parameters</option> + </param> + <when value="yes"> + <param name="activity_profile_out_sel" argument="--activity_profile_out_sel" type="boolean" truevalue="--activity_profile_out_sel" falsevalue="" optional="true" checked="false" label="Activity Profile Out" help="Output the raw activity profile results in IGV format"/> + <param name="assembly_region_out_sel" argument="--assembly_region_out_sel" type="boolean" truevalue="--assembly_region_out_sel" falsevalue="" optional="true" checked="false" label="Assembly Region Out" help="Output the assembly region to this IGV formatted file"/> + <param name="bam_output_sel" argument="--bam_output_sel" type="boolean" truevalue="--bam_output_sel" falsevalue="" optional="true" checked="false" label="Bam Output" help="File to which assembled haplotypes should be written"/> + <param name="graph_output_sel" argument="--graph_output_sel" type="boolean" truevalue="--graph_output_sel" falsevalue="" optional="true" checked="false" label="Graph Output" help="Write debug assembly graph information to this file"/> + </when> + <when value="no" /> + </conditional> + </inputs> + <outputs> + <expand macro="gzip_vcf_output_params"/> + <data format="tabular" name="activity_profile_out" label="${tool.name} on ${on_string}: activity_profile_out tabular"> + <filter>output_opt['activity_profile_out_sel']</filter> + </data> + <data format="tabular" name="assembly_region_out" label="${tool.name} on ${on_string}: assembly_region_out tabular"> + <filter>output_opt['assembly_region_out_sel']</filter> + </data> + <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: bam_output bam"> + <filter>output_opt['bam_output_sel']</filter> + </data> + <data format="txt" name="graph_output" label="${tool.name} on ${on_string}: graph_output txt"> + <filter>output_opt['graph_output_sel']</filter> + </data> + </outputs> + <tests> + <test /> + </tests> + <help><![CDATA[Call somatic short variants via local assembly of haplotypes. Short +variants include single nucleotide (SNV) and insertion and deletion +(indel) variants. The caller combines the DREAM challenge-winning +somatic genotyping engine of the original MuTect (`Cibulskis et al., +2013 <http://www.nature.com/nbt/journal/v31/n3/full/nbt.2514.html>`__) +with the assembly-based machinery of +`HaplotypeCaller <https://www.broadinstitute.org/gatk/documentation/tooldocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php>`__. + +This tool is featured in the *Somatic Short Mutation calling Best +Practice Workflow*. See +`Tutorial#11136 <https://software.broadinstitute.org/gatk/documentation/article?id=11136>`__ +for a step-by-step description of the workflow and +`Article#11127 <https://software.broadinstitute.org/gatk/documentation/article?id=11127>`__ +for an overview of what traditional somatic calling entails. For the +latest pipeline scripts, see the `Mutect2 WDL scripts +directory <https://github.com/broadinstitute/gatk/tree/master/scripts/mutect2_wdl>`__. +Although we present the tool for somatic calling, it may apply to other +contexts, such as mitochondrial variant calling. + +Usage examples +~~~~~~~~~~~~~~ + +Example commands show how to run Mutect2 for typical scenarios. The two +modes are (i) *somatic mode* where a tumor sample is matched with a +normal sample in analysis and (ii) *tumor-only mode* where a single +sample's alignment data undergoes analysis. + +(i) Tumor with matched normal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Given a matched normal, Mutect2 is designed to call somatic variants +only. The tool includes logic to skip emitting variants that are clearly +present in the germline based on provided evidence, e.g. in the matched +normal. This is done at an early stage to avoid spending computational +resources on germline events. If the variant's germline status is +borderline, then Mutect2 will emit the variant to the callset for +subsequent filtering and review. + +:: + + gatk Mutect2 \ + -R reference.fa \ + -I tumor.bam \ + -tumor tumor_sample_name \ + -I normal.bam \ + -normal normal_sample_name \ + --germline-resource af-only-gnomad.vcf.gz \ + --af-of-alleles-not-in-resource 0.00003125 \ + --panel-of-normals pon.vcf.gz \ + -O somatic.vcf.gz + + +The --af-of-alleles-not-in-resource argument value should match +expectations for alleles not found in the provided germline resource. +Note the tool does not require a germline resource nor a panel of +normals (PoN) to run. The tool prefilters sites for the matched normal +and the PoN. For the germline resource, the tool prefilters on the +allele. Below is an excerpt of a known variants resource with population +allele frequencies + +:: + + #CHROM POS ID REF ALT QUAL FILTER INFO + 1 10067 . T TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC 30.35 PASS AC=3;AF=7.384E-5 + 1 10108 . CAACCCT C 46514.32 PASS AC=6;AF=1.525E-4 + 1 10109 . AACCCTAACCCT AAACCCT,* 89837.27 PASS AC=48,5;AF=0.001223,1.273E-4 + 1 10114 . TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA *,CAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTA,T 36728.97 PASS AC=55,9,1;AF=0.001373,2.246E-4,2.496E-5 + 1 10119 . CT C,* 251.23 PASS AC=5,1;AF=1.249E-4,2.498E-5 + 1 10120 . TA CA,* 14928.74 PASS AC=10,6;AF=2.5E-4,1.5E-4 + 1 10128 . ACCCTAACCCTAACCCTAAC A,* 285.71 PASS AC=3,1;AF=7.58E-5,2.527E-5 + 1 10131 . CT C,* 378.93 PASS AC=7,5;AF=1.765E-4,1.261E-4 + 1 10132 . TAACCC *,T 18025.11 PASS AC=12,2;AF=3.03E-4,5.049E-5 + + +(ii) Tumor-only mode +^^^^^^^^^^^^^^^^^^^^ + +This mode runs on a single sample, e.g. single tumor or single normal +sample. To create a PoN, call on each normal sample in this mode, then +use CreateSomaticPanelOfNormals to generate the PoN. + +:: + + gatk Mutect2 \ + -R reference.fa \ + -I sample.bam \ + -tumor sample_name \ + -O single_sample.vcf.gz + + +Further points of interest +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Additional parameters that factor towards filtering, including +normal-artifact-lod (default threshold 0.0) and tumor-lod (default +threshold 5.3), are available in FilterMutectCalls. While the tool +calculates normal-lod assuming a diploid genotype, it calculates +normal-artifact-lod with the same approach it uses for tumor-lod, i.e. +with a variable ploidy assumption. + +- If the normal artifact log odds becomes large, then FilterMutectCalls applies the artifact-in-normal filter. For matched normal samples with tumor contamination, consider increasing the normal-artifact-lod threshold. + +- The tumor log odds, which is calculated independently of any matched normal, determines whether to filter a tumor variant. Variants with tumor LODs exceeding the threshold pass filtering. + + +If a variant is absent from a given germline resource, then the value +for --af-of-alleles-not-in-resource applies. For example, gnomAD's +16,000 samples (~32,000 homologs per locus) becomes a probability of one +in 32,000 or less. Thus, an allele's absence from the germline resource +becomes evidence that it is not a germline variant. + +Caveats +~~~~~~~ + +Although GATK4 Mutect2 accomodates varying coverage depths, further +optimization of parameters may improve calling for extreme high depths, +e.g. 1000X. +]]></help> + <citations> + <expand macro="citations"/> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Sep 26 09:23:23 2019 -0400 @@ -0,0 +1,710 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">4.0.5.1</token> + <token name="@WRAPPER_VERSION@">@VERSION@+galaxy</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">gatk4</requirement> + <requirement type="package" version="0.2.5">tabix</requirement> + <requirement type="package" version="1.9">samtools</requirement> + <yield /> + </requirements> + </xml> + + <!--Hacky way to determine GATK version, for display in tool info--> + <xml name="version_cmd"> + <version_command>gatk SortSam --version 2>&1 | grep Version | cut -d ':' -f 2</version_command> + </xml> + + <!--Command token, java options should not be hard coded here.--> + <token name="@CMD_BEGIN@">gatk --java-options "\$_JAVA_OPTIONS"</token> + + <!--Define sections that parameters could exist within.--> + <template name="set_sections"> + #set global $sections = ['', 'optional.', 'advanced.', 'common.', 'deprecated.'] + </template> + + <!--Reference genome handling--> + <!--One template each for the different reference genome parameter names.--> + <!--TODO: Can the reference parameters all be the same?--> + <xml name="ref_sel"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + <option value="no_ref" selected="true">Do not pass</option> + </param> + <when value="cached"> + <param name="reference_sequence" type="select" label="Reference" help="Reference sequence file." > + <options from_data_table="all_fasta" > + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param name="reference_sequence" type="data" format="fasta" label="Reference" help="Reference sequence file." /> + </when> + <when value="no_ref" /> + </conditional> + </xml> + + <template name="ref_opts"> + #for $sect in $sections + #if $varExists($sect + "reference_source.reference_source_selector") + #if $getVar($sect + "reference_source.reference_source_selector") != "no_ref" + #if $getVar($sect + "reference_source.reference_source_selector") != "history" + --reference $getVar($sect + "reference_source.reference_sequence.fields.path") + #else + --reference reference.fa + #end if + #end if + #end if + #end for + </template> + + <template name="picard_ref_opts"> + #for $sect in $sections + #if $varExists($sect + "reference_source.reference_source_selector") + #if $getVar($sect + "reference_source.reference_source_selector") != "no_ref" + #if $getVar($sect + "reference_source.reference_source_selector") != "history" + --REFERENCE_SEQUENCE $getVar($sect + "reference_source.reference_sequence.fields.path") + #else + --REFERENCE_SEQUENCE $getVar($sect + "reference_source.reference_sequence") + #end if + #end if + #end if + #end for + </template> + + <template name="picard_ref_opts_plain"> + #for $sect in $sections + #if $varExists($sect + "reference_source.reference_source_selector") + #if $getVar($sect + "reference_source.reference_source_selector") != "no_ref" + #if $getVar($sect + "reference_source.reference_source_selector") != "history" + --REFERENCE $getVar($sect + "reference_source.reference_sequence.fields.path") + #else + --REFERENCE $getVar($sect + "reference_source.reference_sequence") + #end if + #end if + #end if + #end for + </template> + + <template name="ref_opts_input"> + #for $sect in $sections + #if $varExists($sect + "reference_source.reference_source_selector") + #if $getVar($sect + "reference_source.reference_source_selector") != "no_ref" + #if $getVar($sect + "reference_source.reference_source_selector") != "history" + --input $getVar($sect + "reference_source.reference_sequence.fields.path") + #else + --input $getVar($sect + "reference_source.reference_sequence") + #end if + #end if + #end if + #end for + </template> + + + <!--Interval Macros--> + <template name="gatk_ints_chth"> + #for $sect in $sections + #if $varExists($sect + "ival_type.ival_type_sel") + #if $getVar($sect + "ival_type.ival_type_sel") == "ival_file" + #if $varExists($sect + "ival_type.intervals") + #if $getVar($sect + "ival_type.intervals").is_of_type("gatk_interval") + --intervals intervals.interval_list + #end if + #if $getVar($sect + "ival_type.intervals").is_of_type("bed") + --intervals intervals.bed + #end if + #if $getVar($sect + "ival_type.intervals").is_of_type("vcf") + --intervals intervals.vcf + #end if + #end if + #else + #if $varExists($sect + "ival_type.intervals") + --intervals $getVar($sect + "ival_type.intervals") + #end if + #end if + #if $varExists($sect + "ival_type.interval_padding") + --interval-padding $getVar($sect + "ival_type.interval_padding") + #end if + #end if + #end for + </template> + + + <template name="pre_gatk_ints_chth"><![CDATA[ + #for $sect in $sections + #if $varExists($sect + "ival_type.ival_type_sel") + #if $getVar($sect + "ival_type.ival_type_sel") == "ival_file" + #if $varExists($sect + "ival_type.intervals") + #if $getVar($sect + "ival_type.intervals").is_of_type("gatk_interval") + ln -s $getVar($sect + "ival_type.intervals") intervals.interval_list && + #end if + #if $getVar($sect + "ival_type.intervals").is_of_type("bed") + ln -s $getVar($sect + "ival_type.intervals") intervals.bed && + #end if + #if $getVar($sect + "ival_type.intervals").is_of_type("vcf") + ln -s $getVar($sect + "ival_type.intervals") intervals.vcf && + #end if + #end if + #end if + #end if + #end for + ]]></template> + + <xml name="gatk_ints"> + <conditional name="ival_type"> + <param name="ival_type_sel" type="select" label="Choose Genomic Interval Source"> + <option value="ival_file" selected="true">Interval File</option> + <option value="ival_text" selected="false">Interval Text Input</option> + </param> + <when value="ival_file"> + <param name="intervals" argument="--intervals" type="data" optional="true" format="bed,vcf,gatk_interval" label="Intervals File" help="One or more genomic intervals over which to operate"/> + <param name="interval_padding" argument="--interval-padding" type="integer" optional="true" value="0" label="Interval Padding" help="Amount of padding (in bp) to add to each interval you are including."/> + </when> + <when value="ival_text"> + <param name="intervals" argument="--intervals" type="text" optional="true" label="Intervals Text Input" help="One or more genomic intervals over which to operate. Enter in chrom:start-stop format."/> + <param name="interval_padding" argument="--interval-padding" type="integer" optional="true" value="0" label="Interval Padding" help="Amount of padding (in bp) to add to each interval you are including."/> + </when> + </conditional> + </xml> + + + <!--Exclude Intervals--> + <xml name="gatk_excl_ints"> + <conditional name="excl_ival_type"> + <param name="excl_ival_type_sel" type="select" label="Choose Genomic Interval Exclusion Source"> + <option value="excl_ival_file" selected="true">Exclude Interval File</option> + <option value="excl_ival_text" selected="false">Exclude Interval Text Input</option> + </param> + <when value="excl_ival_file"> + <param name="exclude_intervals" argument="--exclude-intervals" type="data" optional="true" format="bed,vcf,gatk_interval" label="Exclude Intervals File" help="One or more genomic intervals to exclude from processing"/> + <param name="interval_exclusion_padding" argument="--interval-exclusion-padding" type="integer" optional="true" value="0" label="Interval Exclusion Padding" help="Amount of padding (in bp) to add to each interval you are excluding."/> + </when> + <when value="excl_ival_text"> + <param name="exclude_intervals" argument="--exclude-intervals" type="text" optional="true" label="Exclude Intervals Text Input" help="One or more genomic intervals to exclude from processing. Enter in chrom:start-stop format."/> + <param name="interval_exclusion_padding" argument="--interval-exclusion-padding" type="integer" optional="true" value="0" label="Interval Exclusion Padding" help="Amount of padding (in bp) to add to each interval you are excluding."/> + </when> + </conditional> + </xml> + + <template name="gatk_excl_ints_chth"> + #for $sect in $sections + #if $varExists($sect + "excl_ival_type.excl_ival_type_sel") + #if $getVar($sect + "excl_ival_type.excl_ival_type_sel") == "ival_file" + #if $varExists($sect + "excl_ival_type.exclude_intervals") + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("gatk_interval") + --exclude-intervals excl_intervals.interval_list + #end if + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("bed") + --exclude-intervals excl_intervals.bed + #end if + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("vcf") + --exclude-intervals excl_intervals.vcf + #end if + #end if + #elif $getVar($sect + "excl_ival_type.excl_ival_type_sel") == "excl_ival_text" + #if $varExists($sect + "excl_ival_type.exclude_intervals") + --exclude-intervals $getVar($sect + "excl_ival_type.exclude_intervals") + #end if + #else + #pass + #end if + #if $varExists($sect + "excl_ival_type.interval_exclusion_padding") + --interval-exclusion-padding $getVar($sect + "excl_ival_type.interval_exclusion_padding") + #end if + #end if + #end for + </template> + + <template name="pre_gatk_excl_ints_chth"><![CDATA[ + #for $sect in $sections + #if $varExists($sect + "excl_ival_type.excl_ival_type_sel") + #if $getVar($sect + "excl_ival_type.excl_ival_type_sel") == "excl_ival_file" + #if $varExists($sect + "excl_ival_type.exclude_intervals") + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("gatk_interval") + ln -s $getVar($sect + "excl_ival_type.exclude_intervals") excl_intervals.interval_list && + #end if + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("bed") + ln -s $getVar($sect + "excl_ival_type.exclude_intervals") excl_intervals.bed && + #end if + #if $getVar($sect + "excl_ival_type.exclude_intervals").is_of_type("vcf") + ln -s $getVar($sect + "excl_ival_type.exclude_intervals") excl_intervals.vcf && + #end if + #end if + #end if + #end if + #end for + ]]></template> + + <!--Sequence dictionary selection options for Picard type tools--> + <template name="picard_seqdict_opts"> + #for $sect in $sections + #if $varExists($sect + "seqdict_source.seqdict_source_selector") + #if $getVar($sect + "seqdict_source.seqdict_source_selector") != "no_seq_dict" + #if $getVar($sect + "seqdict_source.seqdict_source_selector") != "history" + #set seq_dict_loc = ''.join($getVar($sect + seqdict_source.seqdict_sequence).fields.path.split('.')[:-1]) + '.dict' + --SEQUENCE_DICTIONARY $seq_dict_loc + #else + --SEQUENCE_DICTIONARY $getVar($sect + "seqdict_source.seqdict_sequence") + #end if + #end if + #end if + #end for + </template> + + <template name="gatk_seqdict"> + #for $sect in $sections + #if $varExists($sect + "seqdict_source.seqdict_source_selector") + #if $getVar($sect + "seqdict_source.seqdict_source_selector") != "no_seq_dict" + #if $getVar($sect + "seqdict_source.seqdict_source_selector") != "history" + #set $seq_dict_loc = ''.join($getVar($sect + "seqdict_source.seqdict_sequence").fields.path.split('.')[:-1]) + '.dict' + --sequence-dictionary $seq_dict_loc + #else + --sequence-dictionary $getVar($sect + "seqdict_source.seqdict_sequence") + #end if + #end if + #end if + #end for + </template> + + <xml name="seq_dict_sel"> + <conditional name="seqdict_source"> + <param name="seqdict_source_selector" type="select" label="Choose the source for the sequence dictionary"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + <option value="no_seq_dict" selected="true">Do not pass</option> + </param> + <when value="cached"> + <param name="seqdict_sequence" type="select" label="Sequence Dictionary" help="Sequence dictionary file." > + <options from_data_table="all_fasta" > + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param name="seqdict_sequence" type="data" format="txt" label="Sequence Dictionary" help="Sequence dictionary file. Must be in dict format." /> + </when> + <when value="no_seq_dict" /> + </conditional> + </xml> + + <!--BAM input--> + <template name="picard_bam_input"> + --INPUT input.bam + </template> + + <template name="gatk_bam_input"> + --input input.bam + </template> + + <template name="bam_index_pre_chth"><![CDATA[ + #for $sect in $sections + #if $varExists($sect + "input") + #if $getVar($sect + "input").is_of_type("bam") + ln -s $getVar($sect + "input") input.bam && + samtools index input.bam && + #else + ln -s $getVar($sect + "input") input.sam && + samtools view -bS input.sam -o input.bam && + samtools index input.bam && + #end if + #end if + #end for + ]]></template> + + <xml name="gatk_bam_req_params"> + <param argument="--input" type="data" format="bam,sam" label="Input BAM/SAM/CRAM file" /> + </xml> + + <template name="picard_bam_index"><![CDATA[ + #if $input.is_of_type("bam") + ln -s $INPUT input.bam && + samtools index input.bam && + #else + ln -s $INPUT input.sam && + samtools view -bS input.sam -o input.bam && + samtools index input.bam && + #end if + ]]></template> + + + <!--Output goes to stdout, no output parameter exists.--> + <template name="stdout_to_output"> + > output.txt + </template> + + <xml name="stdout_to_output_params"> + <data format="txt" name="output" label="${tool.name} on ${on_string}: txt" from_work_dir="output.txt" /> + </xml> + + <!--Multiple input files, true for List[File] types, and sometimes List[String] types --> + <template name="gatk_input_multi"><![CDATA[ + #for $num, $file in enumerate($variant) + #if $file.is_of_type("vcf_bgzip") + --variant input${num}.vcf.gz + #elif $file.is_of_type("txt") + --variant input${num}.list + #else + --variant input${num}.vcf + #end if + #end for + ]]></template> + + <!--Multiple input files, true for List[File] types, and sometimes List[String] types --> + <template name="gatk_input_single"><![CDATA[ + #if $variant.is_of_type("vcf_bgzip") + --variant input${num}.vcf.gz + #elif $variant.is_of_type("txt") + --variant input${num}.list + #else + --variant input${num}.vcf + #end if + ]]></template> + + <template name="gatk_tabix_multi"><![CDATA[ + #for $num, $file in enumerate($variant) + #set datatype = $file.datatype + #if $file.is_of_type("vcf_bgzip") + ln -s $file input${num}.vcf.gz && + tabix input${num}.vcf.gz && + #elif $file.is_of_type("txt") + ln -s $file input${num}.list && + #else + ln -s $file input${num}.vcf && + #end if + #end for + ]]></template> + + <xml name="vcf_input_params_multi"> + <param name="input" type="data" multiple="true" format="vcf,vcf_bgzip" label="Input VCF file" help="Input VCF(s) to be sorted. Multiple inputs must have the same sample names (in order)"/> + </xml> + + + + <!--ABOVE HAS BEEN REVIEWED--> + + + + + + + + + + + <!--{--> + <!--"summary": "BAM/SAM/CRAM file containing reads",--> + <!--"name": "--input",--> + <!--"synonyms": "-I",--> + <!--"type": "List[String]",--> + <!--"required": "yes",--> + <!--"fulltext": "",--> + <!--"defaultValue": "[]",--> + <!--"minValue": "NA",--> + <!--"maxValue": "NA",--> + <!--"minRecValue": "NA",--> + <!--"maxRecValue": "NA",--> + <!--"kind": "required",--> + <!--"options": []--> + <!--},--> + <!--Required BAM input, GATK tool, may be specified multiple times.--> + <!--BAM should be indexed on the fly.--> + <!--Parameter is required, so is not contained within a section.--> + <!--Only decriptor that makes this unique for all input parameters is the summary field.--> + <!--{'pre_chth': ['bam_index_pre_chth'],--> + <!--'main_chth': ['picard_bam_input'],--> + <!--'main_xml': ['gatk_bam_req_params']}},--> + + + + + + + + + + + + + + <!--Macros for multiple input tools. List[File] in GATK json.--> + <template name="vcf_tabix_multi"><![CDATA[ + #for $num, $file in enumerate($input) + #set datatype = $file.datatype + #if $file.is_of_type("vcf_bgzip") + ln -s $file input${num}.vcf.gz && + tabix input${num}.vcf.gz && + #else + ln -s $file input${num}.vcf && + #end if + #end for + ]]></template> + + <template name="vcf_input_multi_picard"><![CDATA[ + #for $num, $file in enumerate($input) + #if $file.is_of_type("vcf_bgzip") + --INPUT input${num}.vcf.gz + #else + --INPUT input${num}.vcf + #end if + #end for + ]]></template> + + <template name="vcf_input_multi"><![CDATA[ + #for $num, $file in enumerate($input) + #if $file.is_of_type("vcf_bgzip") + --input input${num}.vcf.gz + #else + --input input${num}.vcf + #end if + #end for + ]]></template> + + + <!--Picard single input tools--> + <template name="vcf_tabix"><![CDATA[ + #set datatype = $input.datatype + #if $input.is_of_type("vcf_bgzip") + ln -s $input input.vcf.gz && + tabix input.vcf.gz && + #else + ln -s $input input.vcf && + #end if + ]]></template> + + <template name="gatk_tabix"><![CDATA[ + #set datatype = $variant.datatype + #if $variant.is_of_type("vcf_bgzip") + ln -s $variant input.vcf.gz && + tabix input.vcf.gz && + #else + ln -s $variant input.vcf && + gatk IndexFeatureFile -F input.vcf && + #end if + ]]></template> + + <template name="vcf_input_picard"><![CDATA[ + #if $input.is_of_type("vcf_bgzip") + --INPUT input.vcf.gz + #else + --INPUT input.vcf + #end if + ]]></template> + + <template name="vcf_input"><![CDATA[ + #if $input.is_of_type("vcf_bgzip") + --input input.vcf.gz + #else + --input input.vcf + #end if + ]]></template> + + <template name="gatk_input"><![CDATA[ + #if $variant.is_of_type("vcf_bgzip") + --variant input.vcf.gz + #else + --variant input.vcf + #end if + ]]></template> + + <template name="gatk_gvcf_tabix"><![CDATA[ + #if $variant + ln -s $variant input.g.vcf && + #end if + ]]></template> + + <template name="gatk_gvcf_input"><![CDATA[ + --variant input.g.vcf + ]]></template> + + <xml name="gatk_gvcf_input_params"> + <param name="variant" type="data" multiple="false" format="vcf" label="Input gVCF file" help=""/> + </xml> + + <xml name="vcf_input_params"> + <param name="input" type="data" multiple="false" format="vcf,vcf_bgzip" label="Input VCF file" help="Input VCF(s) to be sorted. Multiple inputs must have the same sample names (in order)"/> + </xml> + + <xml name="gatk_vcf_input_params"> + <param name="variant" type="data" multiple="false" format="vcf,vcf_bgzip" label="Input VCF file" help="A VCF file containing variants."/> + </xml> + + <xml name="gatk_vcf_input_params_multi"> + <param name="variant" type="data" multiple="true" format="vcf,vcf_bgzip,txt" label="Input VCF file(s)" help="A VCF file containing variants or a list of VCFs. Can be specified multiple times."/> + </xml> + + <xml name="gatk_req_params"> + <param name="input" type="data" format="bam,sam,cram" label="Input BAM/SAM/CRAM file" /> + </xml> + + <!--HDF5 Inputs--> + + <xml name="hdf5_input"> + <param name="input" type="data" format="h5,tabular" label="Input TSV or HDF5" help="Input TSV or HDF5 file containing integer read counts in genomic intervals for a single case sample (output of CollectReadCounts)." /> + </xml> + + <template name="hdf5_input_chth"><![CDATA[ + --input "${input}" + ]]></template> + + <template name="hdf5_output_chth"> + --output "${output}" + </template> + + <xml name="hdf5_output"> + <data format="h5" name="output" label="${tool.name} on ${on_string}: HDF5" help="Output file for read counts." /> + </xml> + + <!--Output specific to ModelSegments. Files created based on prefix, so force that to be what we want, then pull important files with from_work_dir.--> + <!--${SAMPLE}.cr.seg--> + <!--${SAMPLE}.modelFinal.seg--> + <template name="modelsegments_chth"><![CDATA[ + --output "." + --output-prefix "modelsegments" + ]]></template> + + <xml name="modelsegments_output"> + <data format="tabular" name="cr_seg" label="${tool.name} on ${on_string}: cr.seg" help="Copy-ratio segments." from_work_dir="modelsegments.cr.seg"/> + <data format="tabular" name="modelfinal_seg" label="${tool.name} on ${on_string}: modelFinal.seg" help="Modeled Segments" from_work_dir="modelsegments.modelFinal.seg"/> + </xml> + + <!--deltaMAD.txt--> + <!--denoisedLimit4.png--> + <!--denoisedMAD.txt--> + <!--denoised.png--> + <!--scaledDeltaMAD.txt--> + <!--modeled.png--> + <!--standardizedMAD.txt--> + + <template name="plotmodeledsegments_chth"><![CDATA[ + --output "." + --output-prefix "plotmodeledsegments" + ]]></template> + + <xml name="plotmodeledsegments_output"> + <data format="png" name="modeled_png" label="${tool.name} on ${on_string}: modeled.png" help="Copy-Ratio Plot" from_work_dir="plotmodeledsegments.modeled.png"/> + </xml> + + <!--Common Picard options--> + <template name="picard_opts"> + #if $picard_adv.arguments_file + --arguments_file ${picard_adv.arguments_file} + #end if + --COMPRESSION_LEVEL ${picard_adv.COMPRESSION_LEVEL} + #if $picard_adv.GA4GH_CLIENT_SECRETS + --GA4GH_CLIENT_SECRETS ${picard_adv.GA4GH_CLIENT_SECRETS} + #end if + --MAX_RECORDS_IN_RAM ${picard_adv.MAX_RECORDS_IN_RAM} + --VALIDATION_STRINGENCY ${picard_adv.VALIDATION_STRINGENCY} + --VERBOSITY ${picard_adv.VERBOSITY} + ${picard_adv.CREATE_MD5_FILE} + ${picard_adv.USE_JDK_DEFLATER} + ${picard_adv.USE_JDK_INFLATER} + </template> + + <xml name="picard_params"> + <section name="picard_adv" title="Advanced Picard Options (Only change these if you know what you're doing.)" expanded="False"> + <param argument="--arguments_file" type="data" optional="true" format="txt" label="Arguments File" help="read one or more arguments files and add them to the command line" /> + <param argument="--COMPRESSION_LEVEL" type="integer" optional="true" value="5" min="1" max="9" label="Compression Level" help="Compression level for all compressed files created (e.g. BAM and VCF)." /> + <param argument="--CREATE_MD5_FILE" truevalue="--CREATE_MD5_FILE" falsevalue="" type="boolean" optional="true" checked="false" label="Create MD5 File" help="Whether to create an MD5 digest for any BAM or FASTQ files created." /> + <param argument="--GA4GH_CLIENT_SECRETS" type="data" format="json" optional="true" label="Ga4Gh Client Secrets" help="Google Genomics API client_secrets.json file path." /> + <param argument="--MAX_RECORDS_IN_RAM" type="integer" optional="true" value="500000" label="Max Records In Ram" help="When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed." /> + <param argument="--TMP_DIR" type="text" optional="true" label="Tmp Dir" help="One or more directories with space available to be used by this program for temporary storage of working files. Keep in mind, you must be able to access this directory from either your user, or from the Galaxy user, depending on your configuration." /> + <param argument="--USE_JDK_DEFLATER" truevalue="--USE_JDK_DEFLATER" falsevalue="" type="boolean" optional="true" checked="false" label="Use Jdk Deflater" help="Use the JDK Deflater instead of the Intel Deflater for writing compressed output" /> + <param argument="--USE_JDK_INFLATER" truevalue="--USE_JDK_INFLATER" falsevalue="" type="boolean" optional="true" checked="false" label="Use Jdk Inflater" help="Use the JDK Inflater instead of the Intel Inflater for reading compressed input" /> + <param argument="--VALIDATION_STRINGENCY" type="select" optional="true" label="Validation Stringency" help="Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded." > + <option value="STRICT" selected="true">STRICT</option> + <option value="LENIENT" selected="false">LENIENT</option> + <option value="SILENT" selected="false">SILENT</option> + </param> + <param argument="--VERBOSITY" type="select" optional="true" label="Verbosity" help="Control verbosity of logging." > + <option value="ERROR" selected="false">ERROR</option> + <option value="WARNING" selected="false">WARNING</option> + <option value="INFO" selected="true">INFO</option> + <option value="DEBUG" selected="false">DEBUG</option> + </param> + </section> + </xml> + + <!--Provides option to create gzipped output for VCF files--> + <xml name="gzip_vcf_params"> + <param name="gzipped_output" type="boolean" checked="true" label="GZIP Output?" help="If you would like gzipped output, check this box. In general, it would be preferable to do this, unless your downstream tool does not support handling of gzipped files." /> + </xml> + + <!--Output related Picard options--> + <xml name="gzip_vcf_output_params"> + <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string}: vcf" from_work_dir="output.vcf" > + <filter>not gzipped_output</filter> + </data> + <data format="vcf_bgzip" name="output_vcf_bgzip" label="${tool.name} on ${on_string}: vcf_bgzip" from_work_dir="output.vcf.gz" > + <filter>gzipped_output</filter> + </data> + </xml> + + + <!--These are the same, other than the capitalization of output, so maybe a better way to do this.--> + <template name="picard_vcf_output_opts"> + #if $gzipped_output + --OUTPUT output.vcf.gz + #else + --OUTPUT output.vcf + #end if + </template> + + <template name="vcf_output_opts"> + #if $gzipped_output + --output output.vcf.gz + #else + --output output.vcf + #end if + </template> + + <xml name="picard_output_params"> + <data format="txt" name="output_md5" label="${tool.name} on ${on_string}: md5sum(txt)" from_work_dir="output.bam.md5" > + <filter>picard_adv['CREATE_MD5_FILE']</filter> + </data> + </xml> + + + <!--<template name="ref_opts">--> + <!--#set $sections = ['optional','advanced','common','deprecated','']--> + <!--#silent $sys.stderr.write("I WOULD LIKE TO SHOW THE SECTION VARIABLE: '${sections}'\n")--> + <!--#for $sect in $sections--> + <!--#if $varExists('$sect.reference_source.reference_source_selector')--> + <!--#if $sect.reference_source.reference_source_selector != "no_ref"--> + <!--#if $sect.reference_source.reference_source_selector != "history"--> + <!----reference ${sect.reference_source.reference_sequence.fields.path}--> + <!--#else--> + <!----reference ${sect.reference_source.reference_sequence}--> + <!--#end if--> + <!--#end if--> + <!--#end if--> + <!--#end for--> + <!--</template>--> + + + <!--<template name="ref_opts_opt">--> + <!--#if $optional.reference_source.reference_source_selector != "no_ref"--> + <!--#if $optional.reference_source.reference_source_selector != "history"--> + <!----reference ${optional.reference_source.reference_sequence.fields.path}--> + <!--#else--> + <!----reference ${optional.reference_source.reference_sequence}--> + <!--#end if--> + <!--#end if--> + <!--</template>--> + + + <!--Citations--> + <xml name="citations"> + <citation type="doi">10.1101/gr.107524.110</citation> + <citation type="doi">10.1038/ng.806</citation> + <citation type="doi">10.1002/0471250953.bi1110s43</citation> + <citation type="doi">10.1101/201178</citation> + <yield /> + </xml> + +</macros> \ No newline at end of file