view mutect2.xml @ 11:057bce4b4f93 draft default tip

Uploaded
author elixir-it
date Wed, 02 Oct 2019 04:39:04 -0400
parents e3662508ee26
children
line wrap: on
line source

 <tool id="mutect2" name="MuTect2" version="3.8">
  <description>somatic SNP and indel caller</description>
  <macros>
    <import>mutect2_macros_add_loc.xml</import>
  </macros>
  <requirements>
    <requirement type="package" version="3.8" >gatk</requirement>
    <requirement type="package" version="2.7.1" >picard</requirement>
    <requirement type="package" version="1.7" >samtools</requirement>
  </requirements>
  <command>
    <![CDATA[
      ##creation of .bai the -@ option is used to allocate additional threads
      samtools index -@ \${GALAXY_SLOTS:-4} $input1 &&
      samtools index -@ \${GALAXY_SLOTS:-4} $input2 &&

      ## TODO creation of symlinks because mutect2 want the extensions of the file
      ln -s $input1 tumor.bam &&
      ln -s $input2 normal.bam &&
      ln -s $input1".bai" tumor.bam.bai &&
      ln -s $input2".bai" normal.bam.bai &&
      #if $reference_source.reference_source_selector == "history"
      ln -s $reference_source.ref_file_h genome.fa &&
      ln -s $reference_source.ref_file_h".fai" genome.fa.fai &&
      #end if
      #if $list
      ln -s $list position.bed &&
      #end if
      #if $dbSNP
      ln -s $dbSNP dbSNP.vcf &&
      #end if
      #if $cosmic
      ln -s $cosmic cosmic.vcf &&
      #end if
      #if $alleles
      ln -s $alleles alleles.vcf
      #end if

      ##TODO creation of .dict file of the genome required by mutect2 to run
      #if $reference_source.reference_source_selector == "history"
      java -jar \$CONDA_PREFIX/share/picard-2.7.1-2/picard.jar CreateSequenceDictionary R= genome.fa O= genome.dict 2>$log &&
      #end if

      ##TODO gatk mv_untar_gatk take the GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 unzip it
      ##and move  .jar and  command is runned
      bash $__tool_directory__/mv_untar_gatk.sh &> $log &&

      java -jar \$CONDA_PREFIX/../../GenomeAnalysisTK.jar -nct 4  -T MuTect2 -I:tumor tumor.bam  -I:normal normal.bam  -o $output
      #if $reference_source.reference_source_selector == "history"
      -R genome.fa
      #end if
      #if $reference_source.reference_source_selector == "cached"
      -R $reference_source.ref_file.fields.path
      #end if
      ## TODO advanced inputs section if the optional inputs are present their options are added to the command
        #if $dbSNP
        --dbsnp dbSNP.vcf
        #end if
        #if $cosmic
      --cosmic cosmic.vcf
        #end if
        #if $list
      -L position.bed
        #end if
        #if $alleles
        --alleles alleles.vcf
        #end if

      ##TODO advanced options section if the options inputs are different from the default value the option is added to the command

      #if str($advanced.advanced_parameters) =="show":
        #if $advanced.heterozygosity != "0.001"
          --heterozygosity  $advanced.heterozygosity
        #end if
        #if $advanced.heterozygosity_stdev != "0.01"
          --heterozygosity_stdev $advanced.heterozygosity_stdev
        #end if
        #if  $advanced.indel_heterozygosity != "1.25E-4"
        --indel_heterozygosity $advanced.indel_heterozygosity
        #end if
        #if $advanced.initial_normal_lod != "0.5"
          --initial_normal_lod $advanced.initial_normal_lod
        #end if
        #if $advanced.initial_tumor_lod != "4.0"
          --initial_tumor_lod $advanced.initial_tumor_lod
        #end if
        #if $advanced.max_alt_allele_in_normal_fraction != "0.03"
          --max_alt_allele_in_normal_fraction $advanced.max_alt_allele_in_normal_fraction
        #end if
        #if $advanced.max_alt_alleles_in_normal_count != "1"
          --max_alt_alleles_in_normal_count $advanced.max_alt_alleles_in_normal_count
        #end if
        #if $advanced.max_alt_alleles_in_normal_qscore_sum != "20"
          --max_alt_alleles_in_normal_qscore_sum $advanced.max_alt_alleles_in_normal_qscore_sum
        #end if
        #if $advanced.maxReadsInRegionPerSample != "1000"
          --maxReadsInRegionPerSample $advanced.maxReadsInRegionPerSample
        #end if
        #if $advanced.min_base_quality_score != "10"
          --min_base_quality_score $advanced.min_base_quality_score
        #end if
        #if $advanced.minReadsPerAlignmentStart != "5"
          --minReadsPerAlignmentStart $advanced.minReadsPerAlignmentStart
        #end if
        #if $advanced.normal_lod != "2.2"
          --normal_lod $advanced.normal_lod
        #end if
        #if $advanced.pir_mad_threshold != "3.0"
          --pir_mad_threshold $advanced.pir_mad_threshold
        #end if
        #if $advanced.pir_median_threshold != "10.0"
          --pir_median_threshold $advanced.pir_median_threshold
        #end if
        #if $advanced.power_constant_qscore != "30"
          --power_constant_qscore $advanced.power_constant_qscore
        #end if
        #if $advanced.sample_ploidy != "2"
          --sample_ploidy $advanced.sample_ploidy
        #end if
        #if $advanced.standard_min_confidence_threshold_for_calling != "10.0"
          --standard_min_confidence_threshold_for_calling $advanced.standard_min_confidence_threshold_for_calling
        #end if
        #if $advanced.tumor_lod != "6.3"
          --tumor_lod $advanced.tumor_lod
        #end if
        #if $advanced.contamination_fraction_to_filter != "0.0"
          --contamination_fraction_to_filter $contamination_fraction_to_filter
        #end if
        #if $advanced.dbsnp_normal_lod != "5.5"
          --dbsnp_normal_lod $dbsnp_normal_lod
        #end if
        #if $advanced.debug_read_name != ""
         --debug_read_name $debug_read_name
        #end if
        #if $advanced.genotyping_mode != "DISCOVERY"
          --genotyping_mode $genotyping_mode
        #end if
        #if $advanced.group
      --group  $advanced.group
        #end if
      #end if

      ##TODO output section --> if the option string == "yes" the optional output is added
        #if str($optional_out1.outFile1) =="yes"
          --activeRegionOut $activeRegionOut_output
        #end if
        #if str($optional_out2.outFile2) =="yes"
          --activityProfileOut $activityProfileOut_output
        #end if
        #if str($optional_out3.outFile3) =="yes"
          --graphOutput $graphOutput_output
        #end if
        #if str($optional_out4.outFile4) =="yes"
          --bamOutput $bamOutput_output
        #end if
      ##TODO the standard error is redirected to the log file
      2> $log
                  ]]></command>
  <inputs>
    <expand macro="reference_loc"/>
    <param format="bam" name="input1" type="data" label="tumor bam" help="bamfile"/>
    <param format="bam" name="input2" type="data" label="normal bam" help="bamfile"/>
    <param format="vcf" name="dbSNP" type="data" optional="true" label="dbsnp file.vcf" help="vcf file"/>
    <param format="vcf" name="cosmic" type="data" optional="true" label="cosmic file.vcf" help="vcf file"/>
    <param format="bed" name="list" type="data" optional="true" label="position list" help="bed file"/>
    <param format="vcf" name="alleles" type="data" optional="true" label="set of alleles use in genotyping" help="vcf file"/>
    <conditional name="advanced">
      <param name="advanced_parameters" type="select" label="advanced_parameters">
        <option value="hide" selected="true">Hide</option>
        <option value="show">Show</option>
      </param>
      <when value="hide"/>
      <when value="show">
        <param name="heterozygosity" type="float" optional="true" value="0.001"  help="Heterozygosity value used to compute prior likelihoods for any locus" />
	      <param name="heterozygosity_stdev" type="float" optional="true" value="0.01" help="Standard deviation of eterozygosity for SNP and indel calling"/>
        <param name="indel_heterozygosity" type="text" value="1.25E-4" optional="true"  help="Heterozygosity for indel calling" />
        <param name="initial_normal_lod" type="float" optional="true" value="0.5" help="Initial LOD threshold for calling normal variant"  />
        <param name="initial_tumor_lod" type="float" optional="true" value="4.0"  help="Initial LOD threshold for calling tumor variant" />
        <param name="max_alt_allele_in_normal_fraction" type="float" optional="true" value="0.03"  help="Threshold for maximum alternate allele fraction in normal" />
        <param name="max_alt_alleles_in_normal_count" type="text" optional="true" value="1" help="Threshold for maximum alternate allele counts in normal"  />
        <param name="max_alt_alleles_in_normal_qscore_sum" type="text" optional="true" value="20"  help="Threshold for maximum alternate allele quality score sum in normal" />
        <param name="maxReadsInRegionPerSample" type="text" optional="true" value="1000"  help="Maximum reads in an active region" />
        <param name="min_base_quality_score" type="text" size="2" optional="true" value="10"  help="Minimum base quality required to consider a base for calling" />
        <param name="minReadsPerAlignmentStart" type="text" optional="true" value="5"  help="Minimum number of reads sharing the same alignment start for each genomic location in an active region" />
        <param name="normal_lod" type="float" optional="true" value="2.2"  help="LOD threshold for calling normal non-germline" />
        <param name="pir_mad_threshold" type="float" optional="true" value="3.0" help="threshold for clustered read position artifact MAD"  />
        <param name="pir_median_threshold" type="float" optional="true" value="10.0" help="threshold for clustered read position artifact median"  />
        <param name="power_constant_qscore" type="text" optional="true" value="30" help="Phred scale quality score constant to use in power calculations"  />
        <param name="sample_ploidy" type="text" optional="true" value="2"  help="ploidy per sample" />
        <param name="standard_min_confidence_threshold_for_calling" type="float" optional="true" value="10.0"  help="The minimum phred-scaled confidence threshold at which variants should be called" />
        <param name="tumor_lod" type="float" optional="true" value="6.3"  help="LOD threshold for calling tumor variant" />
        <param name="contamination_fraction_to_filter" type="float" optional="true" value="0.0"  help="Fraction of contamination to aggressively remove" />
        <param name="dbsnp_normal_lod" type="float" optional="true" value="5.5" help="LOD threshold for calling normal non-variant at dbsnp sites"  />
        <param name="debug_read_name" type="text" optional="true" value="" help="trace this read name through the calling process"  />
        <param name="genotyping_mode" type="select" optional="true" help="Specifies how to determine the alternate alleles to use for genotyping"  >
          <option value="DISCOVERY" selected="true">DISCOVERY</option>
          <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
        </param>
        <param name="group" type="text"  optional="true" help="one or more classes, groups of annotation to apply to variant call" />
      </when>
    </conditional>
    <conditional name="optional_out1">
      <param name="outFile1" type="select" label="activeRegionOut">
        <option value="no" selected="true">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="no"/>
      <when value="yes"/>
    </conditional>
    <conditional name="optional_out2">
      <param name="outFile2" type="select" label="activityprofileOut">
        <option value="no" selected="true">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="no"/>
      <when value="yes"/>
    </conditional>
    <conditional name="optional_out3">
      <param name="outFile3" type="select" label="graphOutput">
        <option value="no" selected="true">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="no"/>
      <when value="yes"/>
    </conditional>
    <conditional name="optional_out4">
      <param name="outFile4" type="select" label="Bamoutput">
        <option value="no" selected="true">no</option>
        <option value="yes">yes</option>
      </param>
      <when value="no"/>
      <when value="yes"/>
    </conditional>
  </inputs>
  <outputs>
    <data format="vcf" name="output" label="${tool.name} on ${on_string}"/>
    <data format="txt" name="log" label="${tool.name} on ${on_string} :log"/>
    <data format="txt" name="activeRegionOut_output" optional="true" label="${tool.name} on ${on_string} :activeRegionOut">
      <filter>optional_out1['outFile1'] == 'yes'</filter>
    </data>
    <data format="txt" name="activityProfileOut_output" label="${tool.name} on ${on_string} :activityProfileOut">
      <filter>optional_out2['outFile2'] == 'yes'</filter>
    </data>
    <data format="txt" name="graphOutput_output" label="${tool.name} on ${on_string} :graphOutput">
      <filter>optional_out3['outFile3'] == 'yes'</filter>
    </data>
    <data format="txt" name="bamOutput_output" label="${tool.name} on ${on_string} :bamOutput">
      <filter>optional_out4['outFile4'] == 'yes'</filter>
    </data>
  </outputs>
  <tests>
      <test>
	<conditional name="reference_source">
          <param name="reference_source_selector" value="history"/>
          <param name="ref_file" value="test_fasta.fa"/>
          </conditional>
          <param name="input1" value="mutect2_test_tumoral2.bam" />
          <param name="input2" value="mutect2_test_normal2.bam" />
      </test>
  </tests>
  <help>
    **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK 3.8-0-ge9d806836 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder
    the path of the conda_prefix is written in the galaxy.ini(or .yml) file

    MuTect2 is a somatic SNP and indel caller that combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (Cibulskis et al., 2013) with the assembly-based machinery of HaplotypeCaller.
    Galaxy wrapper for MuTect2 implements most but not all options available through the command line. Supported options are described below.

       **Optional Inputs**

          + --alleles none Set of alleles to use in genotyping
          + --cosmic [] VCF file of COSMIC sites
          + --dbsnp none dbSNP file
          + --activityProfileOut NA Output the raw activity profile results in IGV format
          + --graphOutput NA Write debug assembly graph information to this file

       **Optional Parameters**

          + --contamination_fraction_to_filter 0.0 Fraction of contamination to aggressively remove
          + --dbsnp_normal_lod 5.5 LOD threshold for calling normal non-variant at dbsnp sites
          + --debug_read_name NA trace this read name through the calling process
          + --genotyping_mode DISCOVERY Specifies how to determine the alternate alleles to use for genotyping
          + --group [] One or more classes/groups of annotations to apply to variant calls
          + --heterozygosity 0.001 Heterozygosity value used to compute prior likelihoods for any locus
          + --heterozygosity_stdev 0.01 Standard deviation of eterozygosity for SNP and indel calling
          + --indel_heterozygosity 1.25E-4 Heterozygosity for indel calling
          + --initial_normal_lod 0.5 Initial LOD threshold for calling normal variant
          + --initial_tumor_lod 4.0 Initial LOD threshold for calling tumor variant
          + --max_alt_allele_in_normal_fraction 0.03 Threshold for maximum alternate allele fraction in normal
          + --max_alt_alleles_in_normal_count 1 Threshold for maximum alternate allele counts in normal
          + --max_alt_alleles_in_normal_qscore_sum 20 Threshold for maximum alternate allele quality score sum in normal
          + --maxReadsInRegionPerSample 1000 Maximum reads in an active region
          + --min_base_quality_score 10 Minimum base quality required to consider a base for calling
          + --minReadsPerAlignmentStart 5 Minimum number of reads sharing the same alignment start for each genomic location in an active region
          + --normal_lod 2.2 LOD threshold for calling normal non-germline
          + --pir_mad_threshold 3.0 threshold for clustered read position artifact MAD
          + --pir_median_threshold 10.0 threshold for clustered read position artifact median
          + --power_constant_qscore 30 Phred scale quality score constant to use in power calculations
          + --sample_ploidy 2 Ploidy per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).
          + --standard_min_confidence_threshold_for_calling 10.0 The minimum phred-scaled confidence threshold at which variants should be called
          + --tumor_lod 6.3 LOD threshold for calling tumor variant

       **Advanced Outputs**

          + --bamOutput
          + --activeRegionOut
          + --activityProfileOut
          + --graphOutput

    more information at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php
    </help>
    <citations>
      <citation type="doi">10.1038/nbt.2514</citation>
    </citations>
</tool>