comparison variant_eval.xml @ 0:b3a0923b33ed draft default tip

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 09:12:07 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b3a0923b33ed
1 <tool id="gatk_variant_eval" name="Eval Variants" version="0.0.8">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="1.4">gatk</requirement>
5 </requirements>
6 <macros>
7 <import>gatk_macros.xml</import>
8 </macros>
9 <command interpreter="python">gatk_wrapper.py
10 #from binascii import hexlify
11 --max_jvm_heap_fraction "1"
12 --stdout "${output_log}"
13 #for $var_count, $variant in enumerate( $reference_source.variants ):
14 -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}"
15 #end for
16 -p 'java
17 -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar"
18 -T "VariantEval"
19 --out "${output_report}"
20 --num_threads \${GALAXY_SLOTS:-4}
21 -et "NO_ET" ##ET no phone home
22 ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
23 #if $reference_source.reference_source_selector != "history":
24 -R "${reference_source.ref_file.fields.path}"
25 #end if
26 '
27
28 #for $rod_binding in $comp_rod_bind:
29 -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}"
30 #if str( $rod_binding.comp_known_names ):
31 -p '--known_names "${rod_binding.comp_rod_name}"'
32 #end if
33 #end for
34
35 #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp':
36 -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}"
37 #if str( $dbsnp_rod_bind_type.dbsnp_known_names ):
38 -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"'
39 #end if
40 #end if
41
42 #include source=$standard_gatk_options#
43
44
45 ##start analysis specific options
46 #if $analysis_param_type.analysis_param_type_selector == "advanced":
47 #for $stratification in $analysis_param_type.stratifications:
48 #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name ) )
49 -o '${ hexlify( $select_string ) }'
50 #end for
51 -p '
52
53 #for $sample in $analysis_param_type.samples:
54 --sample "${sample.sample}"
55 #end for
56
57 #if str( $analysis_param_type.stratification_modules ) != "None":
58 #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ):
59 --stratificationModule "${stratification_module}"
60 #end for
61 #end if
62
63 ${analysis_param_type.do_not_use_all_standard_stratifications}
64
65 #for $variant_type in $analysis_param_type.only_variants_of_type:
66 --onlyVariantsOfType "${variant_type.variant_type}"
67 #end for
68
69 #if str( $analysis_param_type.eval_modules ) != "None":
70 #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ):
71 --evalModule "${eval_module}"
72 #end for
73 #end if
74
75 ${analysis_param_type.do_not_use_all_standard_modules}
76
77 #if str( $analysis_param_type.num_samples ) != "0":
78 --numSamples "${analysis_param_type.num_samples}"
79 #end if
80
81 --minPhaseQuality "${analysis_param_type.min_phase_quality}"
82
83 #if str( $analysis_param_type.family ):
84 --family_structure "${analysis_param_type.family}"
85 #end if
86
87 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
88
89 #if str( $analysis_param_type.ancestral_alignments ) != "None":
90 --ancestralAlignments "${analysis_param_type.ancestral_alignments}"
91 #end if
92 '
93 #if str( $analysis_param_type.known_cnvs ) != "None":
94 -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs"
95 #end if
96
97 #if str( $analysis_param_type.strat_intervals ) != "None":
98 -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals"
99 #end if
100 #end if
101 </command>
102 <inputs>
103
104 <conditional name="reference_source">
105 <expand macro="reference_source_selector_param" />
106 <when value="cached">
107 <repeat name="variants" title="Variant" min="1" help="-eval,--eval &amp;lt;eval&amp;gt;">
108 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
109 </repeat>
110 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
111 <options from_data_table="gatk_picard_indexes">
112 <!-- <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> -->
113 </options>
114 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
115 </param>
116 </when>
117 <when value="history"> <!-- FIX ME!!!! -->
118 <repeat name="variants" title="Variant" min="1" help="-eval,--eval &amp;lt;eval&amp;gt;">
119 <param name="input_variant" type="data" format="vcf" label="Input variant file" />
120 </repeat>
121 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
122 </when>
123 </conditional>
124
125 <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data" help="-comp,--comp &amp;lt;comp&amp;gt;">
126 <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" />
127 <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/>
128 <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;"/>
129 </repeat>
130
131 <conditional name="dbsnp_rod_bind_type">
132 <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &amp;lt;dbsnp&amp;gt;">
133 <option value="set_dbsnp" selected="True">Set dbSNP</option>
134 <option value="exclude_dbsnp">Don't set dbSNP</option>
135 </param>
136 <when value="exclude_dbsnp">
137 <!-- Do nothing here -->
138 </when>
139 <when value="set_dbsnp">
140 <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" />
141 <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/>
142 <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" help="-knownName,--known_names &amp;lt;known_names&amp;gt;" />
143 </when>
144 </conditional>
145
146 <expand macro="gatk_param_type_conditional" />
147
148
149 <expand macro="analysis_type_conditional">
150 <repeat name="stratifications" title="Stratification">
151 <param name="select_exps" value="" type="text" label="Stratification Expression" help="-select,--select_exps &amp;lt;select_exps&amp;gt;">
152 <sanitizer>
153 <valid initial="string.printable">
154 <remove value="&apos;"/>
155 </valid>
156 <mapping initial="none"/>
157 </sanitizer>
158 </param>
159 <param name="select_name" value="" type="text" label="Name" help="-selectName,--select_names &amp;lt;select_names&amp;gt;"/>
160 </repeat>
161
162 <repeat name="samples" title="Sample" help="-sn,--sample &amp;lt;sample&amp;gt;">
163 <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/>
164 </repeat>
165
166 <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" help="-ST,--stratificationModule &amp;lt;stratificationModule&amp;gt;" >
167 <!-- do these need individual options also? gatk wiki has little info -->
168 <option value="AlleleFrequency" />
169 <option value="AlleleCount" />
170 <option value="CompRod" />
171 <option value="Contig" />
172 <option value="CpG" />
173 <option value="Degeneracy" />
174 <option value="EvalRod" />
175 <option value="Filter" />
176 <option value="FunctionalClass" />
177 <option value="JexlExpression" />
178 <option value="Sample" />
179 <option value="IntervalStratification" />
180 </param>
181 <param name="do_not_use_all_standard_stratifications" checked="false" type="boolean" truevalue="--doNotUseAllStandardStratifications" falsevalue="" label="Do not use the standard stratification modules by default" help="-noST,--doNotUseAllStandardStratifications" />
182
183 <repeat name="only_variants_of_type" title="only Variants Of Type" help="--onlyVariantsOfType">
184 <param name="variant_type" type="text" value="" label="only variants of these types will be considered during the evaluation"/>
185 </repeat>
186
187 <param name="eval_modules" type="select" multiple="True" display="checkboxes" label="Eval modules to apply to the eval track(s)" help="-EV,--evalModule &amp;lt;evalModule&amp;gt;" >
188 <!-- do these need individual options also? gatk wiki has little info -->
189 <option value="ACTransitionTable" />
190 <option value="AlleleFrequencyComparison" />
191 <option value="AminoAcidTransition" />
192 <option value="CompOverlap" />
193 <option value="CountVariants" />
194 <option value="GenotypeConcordance" />
195 <option value="GenotypePhasingEvaluator" />
196 <option value="IndelMetricsByAC" />
197 <option value="IndelStatistics" />
198 <option value="MendelianViolationEvaluator" />
199 <option value="PrintMissingComp" />
200 <option value="PrivatePermutations" />
201 <option value="SimpleMetricsByAC" />
202 <option value="ThetaVariantEvaluator" />
203 <option value="TiTvVariantEvaluator" />
204 <option value="VariantQualityScore" />
205 </param>
206 <param name="do_not_use_all_standard_modules" checked="false" type="boolean" truevalue="--doNotUseAllStandardModules" falsevalue="" label="Do not use the standard eval modules by default" help="-noEV,--doNotUseAllStandardModules" />
207
208 <param name="num_samples" type="integer" label="Number of samples (used if no samples are available in the VCF file" value="0" help="-ns,--numSamples &amp;lt;numSamples&amp;gt;"/>
209 <param name="min_phase_quality" type="float" label="Minimum phasing quality " value="10.0" help="-mpq,--minPhaseQuality &amp;lt;minPhaseQuality&amp;gt;"/>
210 <param name="family" type="text" value="" label="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined" help="--family_structure"/>
211 <param name="mendelian_violation_qual_threshold" type="integer" label="Minimum genotype QUAL score for each trio member required to accept a site as a violation" value="50" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;"/>
212 <param name="ancestral_alignments" type="data" format="fasta" optional="True" label="Fasta file with ancestral alleles" help="-aa,--ancestralAlignments &amp;lt;ancestralAlignments&amp;gt;" />
213 <param name="known_cnvs" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features describing a known list of copy number variants" help="-knownCNVs,--knownCNVs &amp;lt;knownCNVs&amp;gt;" />
214 <param name="strat_intervals" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features for the IntervalStratificiation" help="-stratIntervals,--stratIntervals &amp;lt;stratIntervals&amp;gt;" />
215
216 </expand>
217
218
219 </inputs>
220 <outputs>
221 <data format="gatk_report" name="output_report" label="${tool.name} on ${on_string} (report)" />
222 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
223 </outputs>
224 <tests>
225 <test>
226 <param name="reference_source_selector" value="history" />
227 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
228 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
229 <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" />
230 <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" />
231 <param name="dbsnp_known_names" value="True"/>
232 <param name="comp_rod_bind" value="0" />
233 <param name="gatk_param_type_selector" value="basic" />
234 <param name="analysis_param_type_selector" value="basic" />
235 <output name="output_report" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.gatk_report" />
236 <output name="output_log" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.log.contains" compare="contains" />
237 </test>
238 </tests>
239 <help>
240 **What it does**
241
242 General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)
243
244 For more information on using the VariantEval module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval&gt;`_.
245
246 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
247
248 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
249
250 ------
251
252 **Inputs**
253
254 GenomeAnalysisTK: VariantEval accepts variant files as input.
255
256
257 **Outputs**
258
259 The output is a table of variant evaluation.
260
261
262 Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
263
264
265 -------
266
267 **Settings**::
268
269 out An output file presented to the walker. Will overwrite contents if file exists.
270 list List the available eval modules and exit
271 select_exps One or more stratifications to use when evaluating the data
272 select_names Names to use for the list of stratifications (must be a 1-to-1 mapping)
273 sample Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context
274 known_names Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets
275 stratificationModule One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)
276 doNotUseAllStandardStratifications Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)
277 onlyVariantsOfType If provided, only variants of these types will be considered during the evaluation, in
278 evalModule One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)
279 doNotUseAllStandardModules Do not use the standard modules by default (instead, only those that are specified with the -E option)
280 numSamples Number of samples (used if no samples are available in the VCF file
281 minPhaseQuality Minimum phasing quality
282 family_structure If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined
283 mendelianViolationQualThreshold Minimum genotype QUAL score for each trio member required to accept a site as a violation
284 ancestralAlignments Fasta file with ancestral alleles
285
286 @CITATION_SECTION@
287 </help>
288 </tool>