0
|
1 <tool id="covacs_VarianRecalibrator" name="covacs_VariantRecalibrator" version="3.8">
|
|
2 <description>GATK VariantRecalibrator wrapper Version = 3.8</description>
|
|
3 <macros>
|
|
4 <import>bed_macros.xml</import>
|
|
5 <import>vcf_macros.xml</import>
|
|
6 </macros>
|
|
7 <requirements>
|
|
8 <requirement type="package" version="3.8" >gatk</requirement>
|
|
9 </requirements>
|
|
10 <command>
|
|
11 <![CDATA[
|
|
12 ### call the .sh to untar the package
|
10
|
13 bash $__tool_directory__/mv_untar_gatk.sh &> $log &&
|
0
|
14
|
|
15 ##sym link to run GATK
|
|
16 #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
|
|
17 ln -s $bed_source.bed_history region.bed &&
|
|
18 #end if
|
|
19
|
|
20 ln -s $input1 input1.vcf &&
|
|
21
|
|
22 ##GATK tool call
|
|
23 java -jar \$CONDA_PREFIX/../../GenomeAnalysisTK.jar -T VariantRecalibrator
|
|
24
|
|
25
|
|
26
|
|
27 #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
|
|
28 -L region.bed
|
|
29 #end if
|
|
30 #if $bed_source.bed_source_selector == "cached"
|
|
31 -L $bed_source.bed_cached.fields.path
|
|
32 #end if
|
|
33
|
|
34
|
|
35
|
|
36 -ip $ip
|
|
37 ##call chose genome from covacs_gatk_indexes.loc
|
|
38 -R $ref_file.fields.path
|
|
39
|
|
40 ##vcf input parameter
|
|
41
|
|
42 -input input1.vcf
|
|
43
|
|
44 ## for that permit to insert different resources
|
|
45
|
|
46 #for $r in $resource#
|
|
47
|
|
48 #if $r.vcf_source.vcf_source_selector == "history" and $r.vcf_source.vcf_history
|
|
49
|
|
50 --resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_history}
|
|
51 #end if
|
|
52
|
|
53 #if $r.vcf_source.vcf_source_selector == "cached"
|
|
54
|
|
55 --resource:${r.nameresource},known=${r.known.value},training=${r.training.value},truth=${r.truth.value},prior=${r.prior.value} ${r.vcf_source.vcf_cached.fields.path}
|
|
56 #end if
|
|
57
|
|
58 #end for
|
|
59
|
|
60 -mode $mode_type.mode
|
|
61 ##chose between INDEL and SNP call
|
|
62
|
|
63 #if $mode_type.mode == "INDEL"
|
|
64 --minNumBadVariants $mode_type.minNumBadVariants
|
|
65 --maxGaussians $mode_type.maxGaussian
|
|
66 -mNG $mode_type.mNG
|
|
67 #end if
|
|
68
|
|
69 ## for that permit to insert different resources
|
|
70
|
|
71 #for $a in $an#
|
|
72
|
|
73 -an ${a.an_name.value}
|
|
74
|
|
75 #end for
|
|
76
|
|
77 ## for that permit to insert different resources
|
|
78
|
|
79 #for $t in $tranches_name#
|
|
80
|
|
81 -tranche ${t.tranches.value}
|
|
82 #end for
|
|
83
|
|
84 ##outputs
|
|
85
|
|
86 -recalFile $recal
|
|
87 -tranchesFile $tranches
|
|
88
|
|
89 2> $log
|
|
90 ]]>
|
|
91 </command>
|
|
92 <inputs>
|
|
93 <param name="ref_file" type="select" label="Using indexed reference genome" help="Select indexed genome from the list">
|
|
94 <options from_data_table="covacs_gatk_indexes">
|
|
95 <filter type="sort_by" column="2" />
|
|
96 <validator type="no_options" message="No indexes are available" />
|
|
97 </options>
|
|
98 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
99 </param>
|
|
100 <param format="vcf" name="input1" label="VCF of raw input variants to be recalibrated" type="data" optional="true" />
|
|
101 <expand macro="bed_loc"/>
|
|
102 <param name="ip" type="integer" value="100" help="Amount of padding (in bp) to add to each interval"/>
|
|
103 <repeat name="resource" title="-resource" help="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)">
|
|
104 <expand macro="vcf_loc"/>
|
|
105 <param name="nameresource" label="name of the resource" type="select" >
|
|
106 <option value="hapmap">hapmap</option>
|
|
107 <option value="omni">omni</option>
|
|
108 <option value="1000G">1000G</option>
|
|
109 <option value="mills">mills</option>
|
|
110 <option value="dbsnp">dbsnp</option>
|
|
111 </param>
|
|
112 <param name="known" type="select" display="radio" help="Known - The program only uses known sites for reporting purposes (to indicate whether variants are already known or novel)" >
|
|
113 <option value="true">true</option>
|
|
114 <option value="false">false</option>
|
|
115 </param>
|
|
116 <param name="training" type="select" display="radio" help="Training - The program builds the Gaussian mixture model using input variants that overlap with these training sites.">
|
|
117 <option value="true">true</option>
|
|
118 <option value="false">false</option>
|
|
119 </param>
|
|
120 <param name="truth" type="select" display="radio" help="Truth - The program uses these truth sites to determine where to set the cutoff in VQSLOD sensitivity">
|
|
121 <option value="true">true</option>
|
|
122 <option value="false">false</option>
|
|
123 </param>
|
|
124 <param name="prior" value="10.0" min="0" max="100.0" type="float"/>
|
|
125 </repeat>
|
|
126 <repeat name="an" title="-an" help="Annotation which should used for calculations">
|
|
127 <param name="an_name" label="annotation name" type="select" help="The name of the annotation which should used for calculations">
|
|
128 <option value="DP">DP</option>
|
|
129 <option value="QD">QD</option>
|
|
130 <option value="MQRankSum">MQRankSum</option>
|
|
131 <option value="ReadPosRankSum">ReadPosRankSum</option>
|
|
132 <option value="FS">FS</option>
|
8
|
133 <option value="MQ">MQ</option>
|
0
|
134 </param>
|
|
135 </repeat>
|
|
136 <repeat name="tranches_name" title="tranches" help="The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)">
|
|
137 <param name="tranches" value="98.0" min="0" max="100.0" type="float"/>
|
|
138 </repeat>
|
|
139 <conditional name="mode_type">
|
|
140 <param name="mode" type="select" display="radio" help=" Recalibration mode to employ (SNP|INDEL)">
|
|
141 <option value="SNP">snp</option>
|
|
142 <option value="INDEL">INDEL</option>
|
|
143 </param>
|
|
144 <when value="INDEL">
|
|
145 <param name="maxGaussian" type="integer" value="4"/>
|
|
146 <param name="minNumBadVariants" type="integer" value="5000"/>
|
|
147 <param name="mNG" type="integer" value="2"/>
|
|
148 </when>
|
|
149 </conditional>
|
|
150 </inputs>
|
|
151 <outputs>
|
|
152 <data format="txt" name="recal" from_work_dir="recal" label="${tool.name} on ${on_string}:recal"/>
|
|
153 <data format="txt" name="tranches" from_work_dir="tranches" label="${tool.name} on ${on_string}:tranches"/>
|
|
154 <data format="txt" name="log" label="log"/>
|
|
155 </outputs>
|
|
156 <help>
|
|
157 .. class:: warningmark
|
|
158
|
|
159 **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file
|
|
160
|
|
161 **more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantrecalibration_VariantRecalibrator.php
|
|
162
|
|
163 -----
|
|
164
|
|
165 **Implemented options** VariantRecalibrator:
|
|
166
|
|
167 **-L** : One or more genomic intervals over which to operate(file.bed)
|
|
168
|
|
169 **-ip** Amount of padding (in bp) to add to each interval
|
|
170
|
|
171 **--resource:NAME,known=true/false,training=true/false,truth=true/false,prior=float $file** :A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)
|
|
172
|
|
173 **-mode** : Recalibration mode to employ (SNP|INDEL)
|
|
174
|
|
175 **-an** : annotations which should used for calculations
|
|
176
|
|
177 **-tranche** The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)
|
|
178
|
|
179 **in case of indels mode**
|
|
180
|
|
181 **--minNumBadVariants** : Minimum number of bad variants
|
|
182
|
|
183 **--maxGaussians** : Max number of Gaussians for the positive model
|
|
184
|
|
185 **-mNG** : Max number of Gaussians for the negative model
|
|
186
|
|
187 **OUTPUTS**
|
|
188
|
|
189 -recalFile
|
|
190
|
|
191 -tranchesFile
|
|
192
|
|
193 -----
|
|
194
|
|
195 .. class:: infomark
|
|
196
|
|
197 **Recommended CoVaCS command**
|
|
198
|
|
199 **-ip** 100
|
|
200
|
|
201 **-R** genome.fa
|
|
202
|
|
203 **-input** VCF
|
|
204
|
|
205 **-resource**:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap.vcf
|
|
206
|
|
207 **-resource**:omni,known=false,training=true,truth=true,prior=12.0 omni.vcf
|
|
208
|
|
209 **-resource**:1000G,known=false,training=false,truth=false,prior=8.0 1000G.vcf
|
|
210
|
|
211 **-resource**:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp.vcf
|
|
212
|
|
213 **-mode** SNP
|
|
214
|
|
215 **-an** DP **-an** QD **-an** MQ **-an** MQRankSum **-an** ReadPosRankSum **-an** FS
|
|
216
|
|
217 **-tranche** 100.0 **-tranche** 99.5 **-tranche** 99.0 **-tranche** 98.5 **-tranche** 90.0
|
|
218
|
|
219 </help>
|
|
220 <citations>
|
|
221 <citation type="doi">10.1186/s12864-018-4508-1</citation>
|
|
222 </citations>
|
|
223 </tool>
|
|
224
|