Mercurial > repos > devteam > variant_apply_recalibration
diff variant_apply_recalibration.xml @ 0:c06c30bfcf1b draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 09:11:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_apply_recalibration.xml Tue Apr 01 09:11:48 2014 -0400 @@ -0,0 +1,139 @@ +<tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.4"> + <description></description> + <requirements> + <requirement type="package" version="1.4">gatk</requirement> + </requirements> + <macros> + <import>gatk_macros.xml</import> + </macros> + <command interpreter="python">gatk_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" + #end for + -p 'java + -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar" + -T "ApplyRecalibration" + ##--num_threads 4 ##hard coded, for now + -et "NO_ET" ##ET no phone home + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${reference_source.input_recal}" + --tranches_file "${reference_source.input_tranches}" + --out "${output_variants}" + ' + + #include source=$standard_gatk_options# + + ##start analysis specific options + -p ' + --mode "${mode}" + + #for $ignore_filter in $ignore_filters: + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) + #if $ignore_filter_name == "custom": + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) + #end if + --ignore_filter "${ignore_filter_name}" + #end for + --ts_filter_level "${ts_filter_level}" + ' + </command> + <inputs> + <conditional name="reference_source"> + <expand macro="reference_source_selector_param" /> + <when value="cached"> + <repeat name="variants" title="Variant" min="1" help="-input,--input &lt;input&gt;"> + <param name="input_variants" type="data" format="vcf" label="Variant file to annotate"/> + </repeat> + <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &lt;recal_file&gt;" /> + <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &lt;tranches_file&gt;" /> + <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;"> + <options from_data_table="gatk_picard_indexes"> + <!-- <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> --> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <repeat name="variants" title="Variant" min="1" help="-input,--input &lt;input&gt;"> + <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" /> + </repeat> + <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &lt;recal_file&gt;" /> + <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &lt;tranches_file&gt;" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> + </when> + </conditional> + + <expand macro="gatk_param_type_conditional" /> + + <param name="mode" type="select" label="Recalibration mode" help="-mode,--mode &lt;mode&gt;"> + <option value="SNP" selected="True">SNP</option> + <option value="INDEL">INDEL</option> + <option value="BOTH">BOTH</option> + </param> + <repeat name="ignore_filters" title="Ignore Filter" help="-ignoreFilter,--ignore_filter &lt;ignore_filter&gt;"> + <conditional name="ignore_filter_type"> + <param name="ignore_filter_type_selector" type="select" label="Filter Type"> + <option value="HARD_TO_VALIDATE">HARD_TO_VALIDATE</option> + <option value="LowQual" >LowQual</option> + <option value="custom" selected="True">Other</option> + </param> + <when value="custom"> + <param name="filter_name" type="text" value="" label="Filter name"/> + </when> + <when value="HARD_TO_VALIDATE" /> + <when value="LowQual" /> + </conditional> + </repeat> + <param name="ts_filter_level" type="float" label="truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots" value="99.0" help="-ts_filter_level,--ts_filter_level &lt;ts_filter_level&gt;"/> + </inputs> + <outputs> + <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (Variants File)" /> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <!-- ADD TESTS --> + </tests> + <help> +**What it does** + +Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration + +For more information on using the ApplyRecalibration module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: ApplyRecalibration accepts a variant input file, a recalibration file and a tranches file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + recal_file The output recal file used by ApplyRecalibration + tranches_file The input tranches file describing where to cut the data + out The output filtered, recalibrated VCF file + ts_filter_level The truth sensitivity level at which to start filtering + ignore_filter If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file + mode Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. (SNP|INDEL|BOTH) + +@CITATION_SECTION@ + </help> +</tool>