view covacs_VariantRecalibrator.xml @ 10:2a59f20b097f draft default tip

Uploaded
author elixir-it
date Wed, 02 Oct 2019 04:31:44 -0400
parents 3b3475d221a4
children
line wrap: on
line source

 <tool id="covacs_VarianRecalibrator" name="covacs_VariantRecalibrator" version="3.8">
  <description>GATK VariantRecalibrator wrapper Version = 3.8</description>
  <macros>
	<import>bed_macros.xml</import>
	<import>vcf_macros.xml</import>
  </macros>
  <requirements>
  	<requirement type="package" version="3.8" >gatk</requirement>
  </requirements>
  <command>
    <![CDATA[
	### call the .sh to untar the package 
	bash $__tool_directory__/mv_untar_gatk.sh &> $log &&
	
	##sym link to run GATK
	 #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
         ln -s $bed_source.bed_history region.bed &&
	#end if

	ln -s $input1 input1.vcf &&
	
	##GATK tool call
	java -jar  \$CONDA_PREFIX/../../GenomeAnalysisTK.jar -T VariantRecalibrator



	#if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
        -L  region.bed
        #end if
        #if $bed_source.bed_source_selector == "cached"
        -L $bed_source.bed_cached.fields.path
        #end if



	-ip $ip
	##call chose genome from covacs_gatk_indexes.loc
      		-R $ref_file.fields.path
	
	##vcf input parameter

	-input input1.vcf

	## for that permit to insert different resources
	
	#for $r in $resource#

		#if $r.vcf_source.vcf_source_selector == "history" and $r.vcf_source.vcf_history
	
        	--resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_history}
		#end if

       	 	#if $r.vcf_source.vcf_source_selector == "cached"

        	--resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_cached.fields.path}
		#end if

	#end for

	-mode $mode_type.mode
	##chose between INDEL and SNP call

	#if $mode_type.mode == "INDEL"
		--minNumBadVariants $mode_type.minNumBadVariants
		--maxGaussians $mode_type.maxGaussian
		-mNG $mode_type.mNG
	#end if	

	## for that permit to insert different resources

	#for $a in $an#

		-an ${a.an_name.value}

	#end for 

	## for that permit to insert different resources

	#for $t in $tranches_name#

		-tranche ${t.tranches.value}
	#end for
	
	##outputs

	-recalFile $recal
	-tranchesFile $tranches

	2> $log
	]]>
  </command>
  <inputs>
    <param name="ref_file" type="select" label="Using indexed reference genome" help="Select indexed genome from the list">
       <options from_data_table="covacs_gatk_indexes">
         <filter type="sort_by" column="2" />
         <validator type="no_options" message="No indexes are available" />
       </options>
       <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
    </param>
    <param format="vcf" name="input1" label="VCF of raw input variants to be recalibrated" type="data" optional="true" />
    <expand macro="bed_loc"/>
    <param name="ip" type="integer" value="100" help="Amount of padding (in bp) to add to each interval"/>
    <repeat name="resource" title="-resource" help="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)">
      <expand macro="vcf_loc"/>
      <param name="nameresource" label="name of the resource" type="select" >
	<option value="hapmap">hapmap</option>
	<option value="omni">omni</option>
	<option value="1000G">1000G</option>
	<option value="mills">mills</option>
	<option value="dbsnp">dbsnp</option>
      </param>
      <param name="known" type="select" display="radio" help="Known - The program only uses known sites for reporting purposes (to indicate whether variants are already known or novel)" >
	<option value="true">true</option>
	<option value="false">false</option>
      </param>
      <param name="training" type="select" display="radio" help="Training - The program builds the Gaussian mixture model using input variants that overlap with these training sites.">
	<option value="true">true</option>
	<option value="false">false</option>
      </param>
      <param name="truth" type="select" display="radio" help="Truth - The program uses these truth sites to determine where to set the cutoff in VQSLOD sensitivity">
	<option value="true">true</option>
	<option value="false">false</option>
      </param>
      <param name="prior" value="10.0" min="0" max="100.0" type="float"/>
    </repeat>
   <repeat name="an" title="-an" help="Annotation which should used for calculations">
      <param name="an_name" label="annotation name" type="select"  help="The name of the annotation which should used for calculations">
	<option value="DP">DP</option>
	<option value="QD">QD</option>
	<option value="MQRankSum">MQRankSum</option>
	<option value="ReadPosRankSum">ReadPosRankSum</option>
	<option value="FS">FS</option>
	<option value="MQ">MQ</option>
       </param>
    </repeat>   	
    <repeat name="tranches_name" title="tranches" help="The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)">
      <param name="tranches" value="98.0" min="0" max="100.0" type="float"/>
    </repeat>
    <conditional name="mode_type">
    	<param name="mode" type="select" display="radio" help=" Recalibration mode to employ (SNP|INDEL)">
		<option value="SNP">snp</option>
		<option value="INDEL">INDEL</option>
    	</param>	
    	<when value="INDEL">
		<param name="maxGaussian" type="integer" value="4"/>
		<param name="minNumBadVariants" type="integer" value="5000"/>
		<param name="mNG" type="integer" value="2"/>
    	</when>
    </conditional>
  </inputs>
  <outputs>
    <data format="txt" name="recal" from_work_dir="recal" label="${tool.name} on ${on_string}:recal"/>
    <data format="txt" name="tranches" from_work_dir="tranches" label="${tool.name} on ${on_string}:tranches"/>
    <data format="txt" name="log" label="log"/>
  </outputs>
  <help>
.. class:: warningmark

**IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file

		**more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantrecalibration_VariantRecalibrator.php

-----

**Implemented options** VariantRecalibrator:

**-L**                   : One or more genomic intervals over which to operate(file.bed)

**-ip**                  Amount of padding (in bp) to add to each interval

**--resource:NAME,known=true/false,training=true/false,truth=true/false,prior=float $file**   :A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)

**-mode**   : Recalibration mode to employ (SNP|INDEL)

**-an** : annotations which should used for calculations

**-tranche**     The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)

**in case of indels mode**

**--minNumBadVariants**   : Minimum number of bad variants

**--maxGaussians**        : Max number of Gaussians for the positive model

**-mNG**                  : Max number of Gaussians for the negative model

**OUTPUTS**

-recalFile

-tranchesFile

-----

.. class:: infomark

**Recommended CoVaCS command**

**-ip** 100

**-R** genome.fa

**-input** VCF

**-resource**:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap.vcf

**-resource**:omni,known=false,training=true,truth=true,prior=12.0 omni.vcf

**-resource**:1000G,known=false,training=false,truth=false,prior=8.0 1000G.vcf

**-resource**:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp.vcf

**-mode** SNP

**-an** DP **-an** QD **-an** MQ **-an** MQRankSum **-an** ReadPosRankSum **-an** FS

**-tranche** 100.0 **-tranche** 99.5 **-tranche** 99.0 **-tranche** 98.5 **-tranche** 90.0

  </help>
  <citations>
        <citation type="doi">10.1186/s12864-018-4508-1</citation>
  </citations>
</tool>