view variant_apply_recalibration.xml @ 0:c06c30bfcf1b draft default tip

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 09:11:48 -0400
parents
children
line wrap: on
line source

<tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.4">
  <description></description>
  <requirements>
      <requirement type="package" version="1.4">gatk</requirement>
  </requirements>
  <macros>
    <import>gatk_macros.xml</import>
  </macros>
  <command interpreter="python">gatk_wrapper.py
   --max_jvm_heap_fraction "1"
   --stdout "${output_log}"
   #for $var_count, $variant in enumerate( $reference_source.variants ):
      -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}"
   #end for
   -p 'java 
    -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
    -T "ApplyRecalibration"
    ##--num_threads 4 ##hard coded, for now
    -et "NO_ET" ##ET no phone home
    #if $reference_source.reference_source_selector != "history":
        -R "${reference_source.ref_file.fields.path}"
    #end if
    --recal_file "${reference_source.input_recal}"
    --tranches_file "${reference_source.input_tranches}"
    --out "${output_variants}"
   '
    
    #include source=$standard_gatk_options#
    
    ##start analysis specific options
    -p '
    --mode "${mode}"
    
    #for $ignore_filter in $ignore_filters:
        #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector )
        #if $ignore_filter_name == "custom":
          #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name )
        #end if
        --ignore_filter "${ignore_filter_name}"
    #end for
    --ts_filter_level "${ts_filter_level}"
    '
  </command>
  <inputs>
    <conditional name="reference_source">
      <expand macro="reference_source_selector_param" />
      <when value="cached">
        <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
          <param name="input_variants" type="data" format="vcf" label="Variant file to annotate"/>
        </repeat>
        <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
        <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
          <options from_data_table="gatk_picard_indexes">
            <!-- <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> -->
          </options>
          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history"> <!-- FIX ME!!!! -->
        <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
          <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" />
        </repeat>
        <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
        <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
      </when>
    </conditional>
    
    <expand macro="gatk_param_type_conditional" />
    
        <param name="mode" type="select" label="Recalibration mode" help="-mode,--mode &amp;lt;mode&amp;gt;">
          <option value="SNP" selected="True">SNP</option>
          <option value="INDEL">INDEL</option>
          <option value="BOTH">BOTH</option>
        </param>
       <repeat name="ignore_filters" title="Ignore Filter" help="-ignoreFilter,--ignore_filter &amp;lt;ignore_filter&amp;gt;">
          <conditional name="ignore_filter_type">
            <param name="ignore_filter_type_selector" type="select" label="Filter Type">
              <option value="HARD_TO_VALIDATE">HARD_TO_VALIDATE</option>
              <option value="LowQual" >LowQual</option>
              <option value="custom" selected="True">Other</option>
            </param>
            <when value="custom">
              <param name="filter_name" type="text" value="" label="Filter name"/>
            </when>
            <when value="HARD_TO_VALIDATE" />
            <when value="LowQual" />
          </conditional>
        </repeat>
    <param name="ts_filter_level" type="float" label="truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots" value="99.0" help="-ts_filter_level,--ts_filter_level &amp;lt;ts_filter_level&amp;gt;"/>
  </inputs>
  <outputs>
    <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (Variants File)" />
    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  </outputs>
  <tests>
      <!-- ADD TESTS -->
  </tests>
  <help>
**What it does**

Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration

For more information on using the ApplyRecalibration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration&gt;`_.

To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.

If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.

------

**Inputs**

GenomeAnalysisTK: ApplyRecalibration accepts a variant input file, a recalibration file and a tranches file.


**Outputs**

The output is in VCF format.


Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.

-------

**Settings**::


 recal_file         The output recal file used by ApplyRecalibration
 tranches_file      The input tranches file describing where to cut the data
 out                The output filtered, recalibrated VCF file
 ts_filter_level    The truth sensitivity level at which to start filtering
 ignore_filter      If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file
 mode               Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. (SNP|INDEL|BOTH)

@CITATION_SECTION@
  </help>
</tool>