0
|
1 <tool id="mutect2" name="MuTect2" version="3.8">
|
|
2 <description>somatic SNP and indel caller</description>
|
|
3 <macros>
|
|
4 <import>mutect2_macros_add_loc.xml</import>
|
|
5 </macros>
|
|
6 <requirements>
|
|
7 <requirement type="package" version="3.8" >gatk</requirement>
|
|
8 <requirement type="package" version="2.7.1" >picard</requirement>
|
|
9 <requirement type="package" version="1.7" >samtools</requirement>
|
|
10 </requirements>
|
|
11 <command>
|
|
12 <![CDATA[
|
|
13 ##creation of .bai the -@ option is used to allocate additional threads
|
|
14 samtools index -@ \${GALAXY_SLOTS:-4} $input1 &&
|
|
15 samtools index -@ \${GALAXY_SLOTS:-4} $input2 &&
|
|
16
|
|
17 ## TODO creation of symlinks because mutect2 want the extensions of the file
|
|
18 ln -s $input1 tumor.bam &&
|
|
19 ln -s $input2 normal.bam &&
|
|
20 ln -s $input1".bai" tumor.bam.bai &&
|
|
21 ln -s $input2".bai" normal.bam.bai &&
|
7
|
22 #if $reference_source.reference_source_selector == "history"
|
|
23 ln -s $reference_source.ref_file_h genome.fa &&
|
|
24 ln -s $reference_source.ref_file_h".fai" genome.fa.fai &&
|
0
|
25 #end if
|
|
26 #if $list
|
|
27 ln -s $list position.bed &&
|
|
28 #end if
|
|
29 #if $dbSNP
|
|
30 ln -s $dbSNP dbSNP.vcf &&
|
|
31 #end if
|
|
32 #if $cosmic
|
|
33 ln -s $cosmic cosmic.vcf &&
|
|
34 #end if
|
|
35 #if $alleles
|
|
36 ln -s $alleles alleles.vcf
|
|
37 #end if
|
|
38
|
|
39 ##TODO creation of .dict file of the genome required by mutect2 to run
|
7
|
40 #if $reference_source.reference_source_selector == "history"
|
|
41 java -jar \$CONDA_DEFAULT_ENV/share/picard-2.7.1-2/picard.jar CreateSequenceDictionary R= genome.fa O= genome.dict 2>$log &&
|
0
|
42 #end if
|
|
43
|
|
44 ##TODO gatk-register take the GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 unzip it
|
|
45 ##and move the .jar file to \$CONDA_DEFAULT_ENV/opt/gatk-3.8/ then the mutect2 command is runned
|
|
46 gatk3-register \$_CONDA_DIR/../GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 2>$log ;
|
8
|
47 java -jar \$CONDA_DEFAULT_ENV/opt/gatk-3.8/GenomeAnalysisTK.jar -nct 4 -T MuTect2 -I:tumor tumor.bam -I:normal normal.bam -o $output
|
7
|
48 #if $reference_source.reference_source_selector == "history"
|
0
|
49 -R genome.fa
|
7
|
50 #end if
|
|
51 #if $reference_source.reference_source_selector == "cached"
|
0
|
52 -R $reference_source.ref_file.fields.path
|
|
53 #end if
|
|
54 ## TODO advanced inputs section if the optional inputs are present their options are added to the command
|
|
55 #if $dbSNP
|
|
56 --dbsnp dbSNP.vcf
|
|
57 #end if
|
|
58 #if $cosmic
|
|
59 --cosmic cosmic.vcf
|
|
60 #end if
|
|
61 #if $list
|
|
62 -L position.bed
|
|
63 #end if
|
|
64 #if $alleles
|
|
65 --alleles alleles.vcf
|
|
66 #end if
|
|
67
|
|
68 ##TODO advanced options section if the options inputs are different from the default value the option is added to the command
|
|
69
|
|
70 #if str($advanced.advanced_parameters) =="show":
|
|
71 #if $advanced.heterozygosity != "0.001"
|
|
72 --heterozygosity $advanced.heterozygosity
|
|
73 #end if
|
|
74 #if $advanced.heterozygosity_stdev != "0.01"
|
|
75 --heterozygosity_stdev $advanced.heterozygosity_stdev
|
|
76 #end if
|
|
77 #if $advanced.indel_heterozygosity != "1.25E-4"
|
|
78 --indel_heterozygosity $advanced.indel_heterozygosity
|
|
79 #end if
|
|
80 #if $advanced.initial_normal_lod != "0.5"
|
|
81 --initial_normal_lod $advanced.initial_normal_lod
|
|
82 #end if
|
|
83 #if $advanced.initial_tumor_lod != "4.0"
|
|
84 --initial_tumor_lod $advanced.initial_tumor_lod
|
|
85 #end if
|
|
86 #if $advanced.max_alt_allele_in_normal_fraction != "0.03"
|
|
87 --max_alt_allele_in_normal_fraction $advanced.max_alt_allele_in_normal_fraction
|
|
88 #end if
|
|
89 #if $advanced.max_alt_alleles_in_normal_count != "1"
|
|
90 --max_alt_alleles_in_normal_count $advanced.max_alt_alleles_in_normal_count
|
|
91 #end if
|
|
92 #if $advanced.max_alt_alleles_in_normal_qscore_sum != "20"
|
|
93 --max_alt_alleles_in_normal_qscore_sum $advanced.max_alt_alleles_in_normal_qscore_sum
|
|
94 #end if
|
|
95 #if $advanced.maxReadsInRegionPerSample != "1000"
|
|
96 --maxReadsInRegionPerSample $advanced.maxReadsInRegionPerSample
|
|
97 #end if
|
|
98 #if $advanced.min_base_quality_score != "10"
|
|
99 --min_base_quality_score $advanced.min_base_quality_score
|
|
100 #end if
|
|
101 #if $advanced.minReadsPerAlignmentStart != "5"
|
|
102 --minReadsPerAlignmentStart $advanced.minReadsPerAlignmentStart
|
|
103 #end if
|
|
104 #if $advanced.normal_lod != "2.2"
|
|
105 --normal_lod $advanced.normal_lod
|
|
106 #end if
|
|
107 #if $advanced.pir_mad_threshold != "3.0"
|
|
108 --pir_mad_threshold $advanced.pir_mad_threshold
|
|
109 #end if
|
|
110 #if $advanced.pir_median_threshold != "10.0"
|
|
111 --pir_median_threshold $advanced.pir_median_threshold
|
|
112 #end if
|
|
113 #if $advanced.power_constant_qscore != "30"
|
|
114 --power_constant_qscore $advanced.power_constant_qscore
|
|
115 #end if
|
|
116 #if $advanced.sample_ploidy != "2"
|
|
117 --sample_ploidy $advanced.sample_ploidy
|
|
118 #end if
|
|
119 #if $advanced.standard_min_confidence_threshold_for_calling != "10.0"
|
|
120 --standard_min_confidence_threshold_for_calling $advanced.standard_min_confidence_threshold_for_calling
|
|
121 #end if
|
|
122 #if $advanced.tumor_lod != "6.3"
|
|
123 --tumor_lod $advanced.tumor_lod
|
|
124 #end if
|
|
125 #if $advanced.contamination_fraction_to_filter != "0.0"
|
|
126 --contamination_fraction_to_filter $contamination_fraction_to_filter
|
|
127 #end if
|
|
128 #if $advanced.dbsnp_normal_lod != "5.5"
|
|
129 --dbsnp_normal_lod $dbsnp_normal_lod
|
|
130 #end if
|
|
131 #if $advanced.debug_read_name != ""
|
|
132 --debug_read_name $debug_read_name
|
|
133 #end if
|
|
134 #if $advanced.genotyping_mode != "DISCOVERY"
|
|
135 --genotyping_mode $genotyping_mode
|
|
136 #end if
|
|
137 #if $advanced.group
|
|
138 --group $advanced.group
|
|
139 #end if
|
|
140 #end if
|
|
141
|
|
142 ##TODO output section --> if the option string == "yes" the optional output is added
|
|
143 #if str($optional_out1.outFile1) =="yes"
|
|
144 --activeRegionOut $activeRegionOut_output
|
|
145 #end if
|
|
146 #if str($optional_out2.outFile2) =="yes"
|
|
147 --activityProfileOut $activityProfileOut_output
|
|
148 #end if
|
|
149 #if str($optional_out3.outFile3) =="yes"
|
|
150 --graphOutput $graphOutput_output
|
|
151 #end if
|
|
152 #if str($optional_out4.outFile4) =="yes"
|
|
153 --bamOutput $bamOutput_output
|
|
154 #end if
|
|
155 ##TODO the standard error is redirected to the log file
|
|
156 2> $log
|
|
157 ]]></command>
|
|
158 <inputs>
|
|
159 <expand macro="reference_loc"/>
|
|
160 <param format="bam" name="input1" type="data" label="tumor bam" help="bamfile"/>
|
|
161 <param format="bam" name="input2" type="data" label="normal bam" help="bamfile"/>
|
|
162 <param format="vcf" name="dbSNP" type="data" optional="true" label="dbsnp file.vcf" help="vcf file"/>
|
|
163 <param format="vcf" name="cosmic" type="data" optional="true" label="cosmic file.vcf" help="vcf file"/>
|
|
164 <param format="bed" name="list" type="data" optional="true" label="position list" help="bed file"/>
|
|
165 <param format="vcf" name="alleles" type="data" optional="true" label="set of alleles use in genotyping" help="vcf file"/>
|
|
166 <conditional name="advanced">
|
|
167 <param name="advanced_parameters" type="select" label="advanced_parameters">
|
|
168 <option value="hide" selected="true">Hide</option>
|
|
169 <option value="show">Show</option>
|
|
170 </param>
|
|
171 <when value="hide"/>
|
|
172 <when value="show">
|
|
173 <param name="heterozygosity" type="float" optional="true" value="0.001" help="Heterozygosity value used to compute prior likelihoods for any locus" />
|
|
174 <param name="heterozygosity_stdev" type="float" optional="true" value="0.01" help="Standard deviation of eterozygosity for SNP and indel calling"/>
|
|
175 <param name="indel_heterozygosity" type="text" value="1.25E-4" optional="true" help="Heterozygosity for indel calling" />
|
|
176 <param name="initial_normal_lod" type="float" optional="true" value="0.5" help="Initial LOD threshold for calling normal variant" />
|
|
177 <param name="initial_tumor_lod" type="float" optional="true" value="4.0" help="Initial LOD threshold for calling tumor variant" />
|
|
178 <param name="max_alt_allele_in_normal_fraction" type="float" optional="true" value="0.03" help="Threshold for maximum alternate allele fraction in normal" />
|
|
179 <param name="max_alt_alleles_in_normal_count" type="text" optional="true" value="1" help="Threshold for maximum alternate allele counts in normal" />
|
|
180 <param name="max_alt_alleles_in_normal_qscore_sum" type="text" optional="true" value="20" help="Threshold for maximum alternate allele quality score sum in normal" />
|
|
181 <param name="maxReadsInRegionPerSample" type="text" optional="true" value="1000" help="Maximum reads in an active region" />
|
|
182 <param name="min_base_quality_score" type="text" size="2" optional="true" value="10" help="Minimum base quality required to consider a base for calling" />
|
|
183 <param name="minReadsPerAlignmentStart" type="text" optional="true" value="5" help="Minimum number of reads sharing the same alignment start for each genomic location in an active region" />
|
|
184 <param name="normal_lod" type="float" optional="true" value="2.2" help="LOD threshold for calling normal non-germline" />
|
|
185 <param name="pir_mad_threshold" type="float" optional="true" value="3.0" help="threshold for clustered read position artifact MAD" />
|
|
186 <param name="pir_median_threshold" type="float" optional="true" value="10.0" help="threshold for clustered read position artifact median" />
|
|
187 <param name="power_constant_qscore" type="text" optional="true" value="30" help="Phred scale quality score constant to use in power calculations" />
|
|
188 <param name="sample_ploidy" type="text" optional="true" value="2" help="ploidy per sample" />
|
|
189 <param name="standard_min_confidence_threshold_for_calling" type="float" optional="true" value="10.0" help="The minimum phred-scaled confidence threshold at which variants should be called" />
|
|
190 <param name="tumor_lod" type="float" optional="true" value="6.3" help="LOD threshold for calling tumor variant" />
|
|
191 <param name="contamination_fraction_to_filter" type="float" optional="true" value="0.0" help="Fraction of contamination to aggressively remove" />
|
|
192 <param name="dbsnp_normal_lod" type="float" optional="true" value="5.5" help="LOD threshold for calling normal non-variant at dbsnp sites" />
|
|
193 <param name="debug_read_name" type="text" optional="true" value="" help="trace this read name through the calling process" />
|
|
194 <param name="genotyping_mode" type="select" optional="true" help="Specifies how to determine the alternate alleles to use for genotyping" >
|
|
195 <option value="DISCOVERY" selected="true">DISCOVERY</option>
|
|
196 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
|
|
197 </param>
|
|
198 <param name="group" type="text" optional="true" help="one or more classes, groups of annotation to apply to variant call" />
|
|
199 </when>
|
|
200 </conditional>
|
|
201 <conditional name="optional_out1">
|
|
202 <param name="outFile1" type="select" label="activeRegionOut">
|
|
203 <option value="no" selected="true">no</option>
|
|
204 <option value="yes">yes</option>
|
|
205 </param>
|
|
206 <when value="no"/>
|
|
207 <when value="yes"/>
|
|
208 </conditional>
|
|
209 <conditional name="optional_out2">
|
|
210 <param name="outFile2" type="select" label="activityprofileOut">
|
|
211 <option value="no" selected="true">no</option>
|
|
212 <option value="yes">yes</option>
|
|
213 </param>
|
|
214 <when value="no"/>
|
|
215 <when value="yes"/>
|
|
216 </conditional>
|
|
217 <conditional name="optional_out3">
|
|
218 <param name="outFile3" type="select" label="graphOutput">
|
|
219 <option value="no" selected="true">no</option>
|
|
220 <option value="yes">yes</option>
|
|
221 </param>
|
|
222 <when value="no"/>
|
|
223 <when value="yes"/>
|
|
224 </conditional>
|
|
225 <conditional name="optional_out4">
|
|
226 <param name="outFile4" type="select" label="Bamoutput">
|
|
227 <option value="no" selected="true">no</option>
|
|
228 <option value="yes">yes</option>
|
|
229 </param>
|
|
230 <when value="no"/>
|
|
231 <when value="yes"/>
|
|
232 </conditional>
|
|
233 </inputs>
|
|
234 <outputs>
|
|
235 <data format="vcf" name="output" label="${tool.name} on ${on_string}"/>
|
|
236 <data format="txt" name="log" label="${tool.name} on ${on_string} :log"/>
|
|
237 <data format="txt" name="activeRegionOut_output" optional="true" label="${tool.name} on ${on_string} :activeRegionOut">
|
|
238 <filter>optional_out1['outFile1'] == 'yes'</filter>
|
|
239 </data>
|
|
240 <data format="txt" name="activityProfileOut_output" label="${tool.name} on ${on_string} :activityProfileOut">
|
|
241 <filter>optional_out2['outFile2'] == 'yes'</filter>
|
|
242 </data>
|
|
243 <data format="txt" name="graphOutput_output" label="${tool.name} on ${on_string} :graphOutput">
|
|
244 <filter>optional_out3['outFile3'] == 'yes'</filter>
|
|
245 </data>
|
|
246 <data format="txt" name="bamOutput_output" label="${tool.name} on ${on_string} :bamOutput">
|
|
247 <filter>optional_out4['outFile4'] == 'yes'</filter>
|
|
248 </data>
|
|
249 </outputs>
|
|
250 <tests>
|
|
251 <test>
|
|
252 <conditional name="reference_source">
|
|
253 <param name="reference_source_selector" value="history"/>
|
|
254 <param name="ref_file" value="test_fasta.fa"/>
|
|
255 </conditional>
|
|
256 <param name="input1" value="mutect2_test_tumoral2.bam" />
|
|
257 <param name="input2" value="mutect2_test_normal2.bam" />
|
|
258 </test>
|
|
259 </tests>
|
|
260 <help>
|
|
261 **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK 3.8-0-ge9d806836 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder
|
|
262 the path of the conda_prefix is written in the galaxy.ini(or .yml) file
|
|
263
|
|
264 MuTect2 is a somatic SNP and indel caller that combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (Cibulskis et al., 2013) with the assembly-based machinery of HaplotypeCaller.
|
|
265 Galaxy wrapper for MuTect2 implements most but not all options available through the command line. Supported options are described below.
|
|
266
|
|
267 **Optional Inputs**
|
|
268
|
|
269 + --alleles none Set of alleles to use in genotyping
|
|
270 + --cosmic [] VCF file of COSMIC sites
|
|
271 + --dbsnp none dbSNP file
|
|
272 + --activityProfileOut NA Output the raw activity profile results in IGV format
|
|
273 + --graphOutput NA Write debug assembly graph information to this file
|
|
274
|
|
275 **Optional Parameters**
|
|
276
|
|
277 + --contamination_fraction_to_filter 0.0 Fraction of contamination to aggressively remove
|
|
278 + --dbsnp_normal_lod 5.5 LOD threshold for calling normal non-variant at dbsnp sites
|
|
279 + --debug_read_name NA trace this read name through the calling process
|
|
280 + --genotyping_mode DISCOVERY Specifies how to determine the alternate alleles to use for genotyping
|
|
281 + --group [] One or more classes/groups of annotations to apply to variant calls
|
|
282 + --heterozygosity 0.001 Heterozygosity value used to compute prior likelihoods for any locus
|
|
283 + --heterozygosity_stdev 0.01 Standard deviation of eterozygosity for SNP and indel calling
|
|
284 + --indel_heterozygosity 1.25E-4 Heterozygosity for indel calling
|
|
285 + --initial_normal_lod 0.5 Initial LOD threshold for calling normal variant
|
|
286 + --initial_tumor_lod 4.0 Initial LOD threshold for calling tumor variant
|
|
287 + --max_alt_allele_in_normal_fraction 0.03 Threshold for maximum alternate allele fraction in normal
|
|
288 + --max_alt_alleles_in_normal_count 1 Threshold for maximum alternate allele counts in normal
|
|
289 + --max_alt_alleles_in_normal_qscore_sum 20 Threshold for maximum alternate allele quality score sum in normal
|
|
290 + --maxReadsInRegionPerSample 1000 Maximum reads in an active region
|
|
291 + --min_base_quality_score 10 Minimum base quality required to consider a base for calling
|
|
292 + --minReadsPerAlignmentStart 5 Minimum number of reads sharing the same alignment start for each genomic location in an active region
|
|
293 + --normal_lod 2.2 LOD threshold for calling normal non-germline
|
|
294 + --pir_mad_threshold 3.0 threshold for clustered read position artifact MAD
|
|
295 + --pir_median_threshold 10.0 threshold for clustered read position artifact median
|
|
296 + --power_constant_qscore 30 Phred scale quality score constant to use in power calculations
|
|
297 + --sample_ploidy 2 Ploidy per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).
|
|
298 + --standard_min_confidence_threshold_for_calling 10.0 The minimum phred-scaled confidence threshold at which variants should be called
|
|
299 + --tumor_lod 6.3 LOD threshold for calling tumor variant
|
|
300
|
|
301 **Advanced Outputs**
|
|
302
|
|
303 + --bamOutput
|
|
304 + --activeRegionOut
|
|
305 + --activityProfileOut
|
|
306 + --graphOutput
|
|
307
|
|
308 more information at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php
|
|
309 </help>
|
|
310 <citations>
|
|
311 <citation type="doi">10.1038/nbt.2514</citation>
|
|
312 </citations>
|
|
313 </tool>
|