diff variant_apply_recalibration.xml @ 0:c06c30bfcf1b draft default tip

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 09:11:48 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_apply_recalibration.xml	Tue Apr 01 09:11:48 2014 -0400
@@ -0,0 +1,139 @@
+<tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.4">
+  <description></description>
+  <requirements>
+      <requirement type="package" version="1.4">gatk</requirement>
+  </requirements>
+  <macros>
+    <import>gatk_macros.xml</import>
+  </macros>
+  <command interpreter="python">gatk_wrapper.py
+   --max_jvm_heap_fraction "1"
+   --stdout "${output_log}"
+   #for $var_count, $variant in enumerate( $reference_source.variants ):
+      -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}"
+   #end for
+   -p 'java 
+    -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
+    -T "ApplyRecalibration"
+    ##--num_threads 4 ##hard coded, for now
+    -et "NO_ET" ##ET no phone home
+    #if $reference_source.reference_source_selector != "history":
+        -R "${reference_source.ref_file.fields.path}"
+    #end if
+    --recal_file "${reference_source.input_recal}"
+    --tranches_file "${reference_source.input_tranches}"
+    --out "${output_variants}"
+   '
+    
+    #include source=$standard_gatk_options#
+    
+    ##start analysis specific options
+    -p '
+    --mode "${mode}"
+    
+    #for $ignore_filter in $ignore_filters:
+        #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector )
+        #if $ignore_filter_name == "custom":
+          #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name )
+        #end if
+        --ignore_filter "${ignore_filter_name}"
+    #end for
+    --ts_filter_level "${ts_filter_level}"
+    '
+  </command>
+  <inputs>
+    <conditional name="reference_source">
+      <expand macro="reference_source_selector_param" />
+      <when value="cached">
+        <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
+          <param name="input_variants" type="data" format="vcf" label="Variant file to annotate"/>
+        </repeat>
+        <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
+        <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
+        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
+          <options from_data_table="gatk_picard_indexes">
+            <!-- <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> -->
+          </options>
+          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> <!-- FIX ME!!!! -->
+        <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
+          <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" />
+        </repeat>
+        <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
+        <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
+        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
+      </when>
+    </conditional>
+    
+    <expand macro="gatk_param_type_conditional" />
+    
+        <param name="mode" type="select" label="Recalibration mode" help="-mode,--mode &amp;lt;mode&amp;gt;">
+          <option value="SNP" selected="True">SNP</option>
+          <option value="INDEL">INDEL</option>
+          <option value="BOTH">BOTH</option>
+        </param>
+       <repeat name="ignore_filters" title="Ignore Filter" help="-ignoreFilter,--ignore_filter &amp;lt;ignore_filter&amp;gt;">
+          <conditional name="ignore_filter_type">
+            <param name="ignore_filter_type_selector" type="select" label="Filter Type">
+              <option value="HARD_TO_VALIDATE">HARD_TO_VALIDATE</option>
+              <option value="LowQual" >LowQual</option>
+              <option value="custom" selected="True">Other</option>
+            </param>
+            <when value="custom">
+              <param name="filter_name" type="text" value="" label="Filter name"/>
+            </when>
+            <when value="HARD_TO_VALIDATE" />
+            <when value="LowQual" />
+          </conditional>
+        </repeat>
+    <param name="ts_filter_level" type="float" label="truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots" value="99.0" help="-ts_filter_level,--ts_filter_level &amp;lt;ts_filter_level&amp;gt;"/>
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (Variants File)" />
+    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
+  </outputs>
+  <tests>
+      <!-- ADD TESTS -->
+  </tests>
+  <help>
+**What it does**
+
+Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration
+
+For more information on using the ApplyRecalibration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration&gt;`_.
+
+To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
+
+If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
+
+------
+
+**Inputs**
+
+GenomeAnalysisTK: ApplyRecalibration accepts a variant input file, a recalibration file and a tranches file.
+
+
+**Outputs**
+
+The output is in VCF format.
+
+
+Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
+
+-------
+
+**Settings**::
+
+
+ recal_file         The output recal file used by ApplyRecalibration
+ tranches_file      The input tranches file describing where to cut the data
+ out                The output filtered, recalibrated VCF file
+ ts_filter_level    The truth sensitivity level at which to start filtering
+ ignore_filter      If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file
+ mode               Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. (SNP|INDEL|BOTH)
+
+@CITATION_SECTION@
+  </help>
+</tool>