Mercurial > repos > elixir-it > covacs_variant_recalibrator
view covacs_VariantRecalibrator.xml @ 10:2a59f20b097f draft default tip
Uploaded
author | elixir-it |
---|---|
date | Wed, 02 Oct 2019 04:31:44 -0400 |
parents | 3b3475d221a4 |
children |
line wrap: on
line source
<tool id="covacs_VarianRecalibrator" name="covacs_VariantRecalibrator" version="3.8"> <description>GATK VariantRecalibrator wrapper Version = 3.8</description> <macros> <import>bed_macros.xml</import> <import>vcf_macros.xml</import> </macros> <requirements> <requirement type="package" version="3.8" >gatk</requirement> </requirements> <command> <![CDATA[ ### call the .sh to untar the package bash $__tool_directory__/mv_untar_gatk.sh &> $log && ##sym link to run GATK #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history ln -s $bed_source.bed_history region.bed && #end if ln -s $input1 input1.vcf && ##GATK tool call java -jar \$CONDA_PREFIX/../../GenomeAnalysisTK.jar -T VariantRecalibrator #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history -L region.bed #end if #if $bed_source.bed_source_selector == "cached" -L $bed_source.bed_cached.fields.path #end if -ip $ip ##call chose genome from covacs_gatk_indexes.loc -R $ref_file.fields.path ##vcf input parameter -input input1.vcf ## for that permit to insert different resources #for $r in $resource# #if $r.vcf_source.vcf_source_selector == "history" and $r.vcf_source.vcf_history --resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_history} #end if #if $r.vcf_source.vcf_source_selector == "cached" --resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_cached.fields.path} #end if #end for -mode $mode_type.mode ##chose between INDEL and SNP call #if $mode_type.mode == "INDEL" --minNumBadVariants $mode_type.minNumBadVariants --maxGaussians $mode_type.maxGaussian -mNG $mode_type.mNG #end if ## for that permit to insert different resources #for $a in $an# -an ${a.an_name.value} #end for ## for that permit to insert different resources #for $t in $tranches_name# -tranche ${t.tranches.value} #end for ##outputs -recalFile $recal -tranchesFile $tranches 2> $log ]]> </command> <inputs> <param name="ref_file" type="select" label="Using indexed reference genome" help="Select indexed genome from the list"> <options from_data_table="covacs_gatk_indexes"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No indexes are available" /> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> <param format="vcf" name="input1" label="VCF of raw input variants to be recalibrated" type="data" optional="true" /> <expand macro="bed_loc"/> <param name="ip" type="integer" value="100" help="Amount of padding (in bp) to add to each interval"/> <repeat name="resource" title="-resource" help="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)"> <expand macro="vcf_loc"/> <param name="nameresource" label="name of the resource" type="select" > <option value="hapmap">hapmap</option> <option value="omni">omni</option> <option value="1000G">1000G</option> <option value="mills">mills</option> <option value="dbsnp">dbsnp</option> </param> <param name="known" type="select" display="radio" help="Known - The program only uses known sites for reporting purposes (to indicate whether variants are already known or novel)" > <option value="true">true</option> <option value="false">false</option> </param> <param name="training" type="select" display="radio" help="Training - The program builds the Gaussian mixture model using input variants that overlap with these training sites."> <option value="true">true</option> <option value="false">false</option> </param> <param name="truth" type="select" display="radio" help="Truth - The program uses these truth sites to determine where to set the cutoff in VQSLOD sensitivity"> <option value="true">true</option> <option value="false">false</option> </param> <param name="prior" value="10.0" min="0" max="100.0" type="float"/> </repeat> <repeat name="an" title="-an" help="Annotation which should used for calculations"> <param name="an_name" label="annotation name" type="select" help="The name of the annotation which should used for calculations"> <option value="DP">DP</option> <option value="QD">QD</option> <option value="MQRankSum">MQRankSum</option> <option value="ReadPosRankSum">ReadPosRankSum</option> <option value="FS">FS</option> <option value="MQ">MQ</option> </param> </repeat> <repeat name="tranches_name" title="tranches" help="The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)"> <param name="tranches" value="98.0" min="0" max="100.0" type="float"/> </repeat> <conditional name="mode_type"> <param name="mode" type="select" display="radio" help=" Recalibration mode to employ (SNP|INDEL)"> <option value="SNP">snp</option> <option value="INDEL">INDEL</option> </param> <when value="INDEL"> <param name="maxGaussian" type="integer" value="4"/> <param name="minNumBadVariants" type="integer" value="5000"/> <param name="mNG" type="integer" value="2"/> </when> </conditional> </inputs> <outputs> <data format="txt" name="recal" from_work_dir="recal" label="${tool.name} on ${on_string}:recal"/> <data format="txt" name="tranches" from_work_dir="tranches" label="${tool.name} on ${on_string}:tranches"/> <data format="txt" name="log" label="log"/> </outputs> <help> .. class:: warningmark **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file **more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantrecalibration_VariantRecalibrator.php ----- **Implemented options** VariantRecalibrator: **-L** : One or more genomic intervals over which to operate(file.bed) **-ip** Amount of padding (in bp) to add to each interval **--resource:NAME,known=true/false,training=true/false,truth=true/false,prior=float $file** :A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run) **-mode** : Recalibration mode to employ (SNP|INDEL) **-an** : annotations which should used for calculations **-tranche** The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent) **in case of indels mode** **--minNumBadVariants** : Minimum number of bad variants **--maxGaussians** : Max number of Gaussians for the positive model **-mNG** : Max number of Gaussians for the negative model **OUTPUTS** -recalFile -tranchesFile ----- .. class:: infomark **Recommended CoVaCS command** **-ip** 100 **-R** genome.fa **-input** VCF **-resource**:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap.vcf **-resource**:omni,known=false,training=true,truth=true,prior=12.0 omni.vcf **-resource**:1000G,known=false,training=false,truth=false,prior=8.0 1000G.vcf **-resource**:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp.vcf **-mode** SNP **-an** DP **-an** QD **-an** MQ **-an** MQRankSum **-an** ReadPosRankSum **-an** FS **-tranche** 100.0 **-tranche** 99.5 **-tranche** 99.0 **-tranche** 98.5 **-tranche** 90.0 </help> <citations> <citation type="doi">10.1186/s12864-018-4508-1</citation> </citations> </tool>