comparison freebayes-d291dc763c4c/freebayes.xml @ 0:9a7e8a919c78 draft

Uploaded
author fubar
date Sat, 28 Sep 2013 05:57:27 -0400
parents
children 5022cd74093c
comparison
equal deleted inserted replaced
-1:000000000000 0:9a7e8a919c78
1 <?xml version="1.0"?>
2 <tool id="freebayes" name="FreeBayes" version="0.9.9">
3 <stdio>
4 <regex match=".*" source="both" level="warning" description="freebayes said:"/>
5 </stdio>
6 <requirements>
7 <requirement type="package" version="0.9.9_c993c5c07e7673">freebayes</requirement>
8 <requirement type="package" version="0.1.18">samtools</requirement>
9 </requirements>
10 <description> - Bayesian genetic variant detector</description>
11 <command>
12 ##set up input files
13 #set $reference_fasta_filename = "localref.fa"
14 #if str( $reference_source.reference_source_selector ) == "history":
15 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
16 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp;
17 #else:
18 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
19 #end if
20 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
21 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
22 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
23 #end for
24 ##finished setting up inputs
25
26 ##start FreeBayes commandline
27 freebayes
28 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
29 --bam "localbam_${bam_count}.bam"
30 #end for
31 --fasta-reference "${reference_fasta_filename}"
32
33 ##outputs
34 --vcf "${output_vcf}"
35
36 ##advanced options
37 #if str( $options_type.options_type_selector ) == "advanced":
38 ##additional outputs
39 #if $options_type.output_trace_option:
40 --trace "${output_trace}"
41 #end if
42 #if $options_type.output_failed_alleles_option:
43 --failed-alleles "${output_failed_alleles_bed}"
44 #end if
45
46 ##additional inputs
47 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
48 --targets "${options_type.target_limit_type.input_target_bed}"
49 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
50 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
51 #end if
52 #if $options_type.input_sample_file:
53 --samples "${options_type.input_sample_file}"
54 #end if
55 #if $options_type.input_populations_file:
56 --populations "${options_type.input_populations_file}"
57 #end if
58 #if $options_type.input_cnv_map_bed:
59 --cnv-map "${options_type.input_cnv_map_bed}"
60 #end if
61 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
62 --variant-input "${options_type.input_variant_type.input_variant_vcf}"
63 ${options_type.input_variant_type.only_use_input_alleles}
64 #end if
65 #if $options_type.haplotype_basis_alleles:
66 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}"
67 #end if
68
69
70 ##reporting
71 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
72 --pvar "${options_type.section_reporting_type.pvar}"
73 ${options_type.section_reporting_type.show_reference_repeats}
74 #end if
75
76 ##population model
77 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
78 --theta "${options_type.section_population_model_type.theta}"
79 --ploidy "${options_type.section_population_model_type.ploidy}"
80 ${options_type.section_population_model_type.pooled}
81 #end if
82
83 ##reference allele
84 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
85 --use-reference-allele
86 ${options_type.use_reference_allele_type.diploid_reference}
87 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
88 #end if
89
90 ##allele scope
91 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
92 ${options_type.section_allele_scope_type.no_snps}
93 ${options_type.section_allele_scope_type.no_indels}
94 ${options_type.section_allele_scope_type.no_mnps}
95 ${options_type.section_allele_scope_type.no_complex}
96 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
97 #if $options_type.section_allele_scope_type.max_complex_gap:
98 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
99 #end if
100 #end if
101
102 ##indel realignment
103 ${options_type.left_align_indels}
104
105 ##input filters
106 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
107 ${options_type.section_input_filters_type.use_duplicate_reads}
108 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters":
109 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}"
110 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}"
111 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}"
112 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters":
113 --standard-filters
114 #end if
115 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
116 #if $options_type.section_input_filters_type.read_mismatch_limit:
117 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
118 #end if
119 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
120 #if $options_type.section_input_filters_type.read_snp_limit:
121 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
122 #end if
123 #if $options_type.section_input_filters_type.read_indel_limit:
124 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
125 #end if
126 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
127 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
128 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
129 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
130 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
131 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
132 #end if
133
134 ##bayesian priors
135 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
136 ${options_type.section_bayesian_priors_type.no_ewens_priors}
137 ${options_type.section_bayesian_priors_type.no_population_priors}
138 ${options_type.section_bayesian_priors_type.hwe_priors}
139 #end if
140
141 ##observation prior expectations
142 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
143 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
144 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
145 #end if
146
147 ##algorithmic features
148 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
149 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
150 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
151 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
152 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
153 ${options_type.section_algorithmic_features_type.no_permute}
154 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
155 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
156 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
157 #end if
158 ${options_type.section_algorithmic_features_type.use_mapping_quality}
159 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
160 ${options_type.section_algorithmic_features_type.no_marginals}
161 #end if
162
163 #end if
164 </command>
165 <inputs>
166 <conditional name="reference_source">
167 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
168 <option value="cached">Locally cached</option>
169 <option value="history">History</option>
170 </param>
171 <when value="cached">
172 <repeat name="input_bams" title="Sample BAM file" min="1">
173 <param name="input_bam" type="data" format="bam" label="BAM file">
174 <validator type="unspecified_build" />
175 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." />
176 </param>
177 </repeat>
178 <param name="ref_file" type="select" label="Using reference genome">
179 <options from_data_table="sam_fa_indexes">
180 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
181 </options>
182 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
183 </param>
184 </when>
185 <when value="history"> <!-- FIX ME!!!! -->
186 <repeat name="input_bams" title="Sample BAM file" min="1">
187 <param name="input_bam" type="data" format="bam" label="BAM file" />
188 </repeat>
189 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
190 </when>
191 </conditional>
192
193 <conditional name="options_type">
194 <param name="options_type_selector" type="select" label="Basic or Advanced options">
195 <option value="basic" selected="True">Basic</option>
196 <option value="advanced">Advanced</option>
197 </param>
198 <when value="basic">
199 <!-- Do nothing here -->
200 </when>
201 <when value="advanced">
202
203 <!-- output -->
204 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" />
205 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" />
206
207
208 <!-- input -->
209 <conditional name="target_limit_type">
210 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets">
211 <option value="do_not_limit" selected="True">Do not limit</option>
212 <option value="limit_by_target_file">Limit by target file</option>
213 <option value="limit_by_region">Limit to region</option>
214 </param>
215 <when value="do_not_limit">
216 <!-- Do nothing here -->
217 </when>
218 <when value="limit_by_target_file">
219 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." />
220 </when>
221 <when value="limit_by_region">
222 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
223 <param name="region_start" type="integer" label="Region Start" value="" />
224 <param name="region_end" type="integer" label="Region End" value="" />
225 </when>
226 </conditional>
227 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" />
228 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" />
229 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" />
230 <conditional name="input_variant_type">
231 <param name="input_variant_type_selector" type="select" label="Provide variants file">
232 <option value="do_not_provide" selected="True">Do not provide</option>
233 <option value="provide_vcf">Provide VCF file</option>
234 </param>
235 <when value="do_not_provide">
236 <!-- Do nothing here -->
237 </when>
238 <when value="provide_vcf">
239 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" />
240 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
241 </when>
242 </conditional>
243 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" />
244
245 <!-- reporting -->
246 <conditional name="section_reporting_type">
247 <param name="section_reporting_type_selector" type="select" label="Set Reporting options">
248 <option value="do_not_set" selected="True">Do not set</option>
249 <option value="set">Set</option>
250 </param>
251 <when value="do_not_set">
252 <!-- do nothing here -->
253 </when>
254 <when value="set">
255 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
256 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
257 </when>
258 </conditional>
259
260
261 <!-- population model -->
262 <conditional name="section_population_model_type">
263 <param name="section_population_model_type_selector" type="select" label="Set population model options">
264 <option value="do_not_set" selected="True">Do not set</option>
265 <option value="set">Set</option>
266 </param>
267 <when value="do_not_set">
268 <!-- do nothing here -->
269 </when>
270 <when value="set">
271 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
272 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
273 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
274 </when>
275 </conditional>
276
277 <!-- reference allele -->
278 <conditional name="use_reference_allele_type">
279 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
280 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
281 <option value="include_reference_allele">Include</option>
282 </param>
283 <when value="do_not_include_reference_allele">
284 <!-- Do nothing here -->
285 </when>
286 <when value="include_reference_allele">
287 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
288 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
289 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
290 </when>
291 </conditional>
292
293 <!-- allele scope -->
294 <conditional name="section_allele_scope_type">
295 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
296 <option value="do_not_set" selected="True">Do not set</option>
297 <option value="set">Set</option>
298 </param>
299 <when value="do_not_set">
300 <!-- do nothing here -->
301 </when>
302 <when value="set">
303 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
304 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
305 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
306 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
307 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
308 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
309 </when>
310 </conditional>
311
312 <!-- indel realignment -->
313 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
314
315 <!-- input filters -->
316 <conditional name="section_input_filters_type">
317 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
318 <option value="do_not_set" selected="True">Do not set</option>
319 <option value="set">Set</option>
320 </param>
321 <when value="do_not_set">
322 <!-- do nothing here -->
323 </when>
324 <when value="set">
325 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
326 <conditional name="quality_filter_type">
327 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters">
328 <option value="standard_filters" selected="True">Apply standard</option>
329 <option value="apply_filters">Apply specified</option>
330 </param>
331 <when value="standard_filters">
332 <!-- Do nothing here --> <!-- standard-filters -->
333 </when>
334 <when value="apply_filters">
335 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" />
336 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" />
337 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
338 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
339 </when>
340 </conditional>
341 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is &gt;=" value="10" />
342 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
343 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="1.0" />
344 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
345 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
346 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
347 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
348 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
349 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
350 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
351 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
352 </when>
353 </conditional>
354
355
356 <!-- bayesian priors -->
357 <conditional name="section_bayesian_priors_type">
358 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
359 <option value="do_not_set" selected="True">Do not set</option>
360 <option value="set">Set</option>
361 </param>
362 <when value="do_not_set">
363 <!-- do nothing here -->
364 </when>
365 <when value="set">
366 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
367 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
368 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
369 </when>
370 </conditional>
371
372 <!-- observation prior expectations -->
373 <conditional name="section_observation_prior_expectations_type">
374 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
375 <option value="do_not_set" selected="True">Do not set</option>
376 <option value="set">Set</option>
377 </param>
378 <when value="do_not_set">
379 <!-- do nothing here -->
380 </when>
381 <when value="set">
382 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
383 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
384 </when>
385 </conditional>
386
387
388 <!-- algorithmic features -->
389 <conditional name="section_algorithmic_features_type">
390 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
391 <option value="do_not_set" selected="True">Do not set</option>
392 <option value="set">Set</option>
393 </param>
394 <when value="do_not_set">
395 <!-- do nothing here -->
396 </when>
397 <when value="set">
398 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
399 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
400 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
401 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
402 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
403 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
404 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
405 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
406 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
407 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
408 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
409 </when>
410 </conditional>
411
412
413 </when>
414 </conditional>
415
416 </inputs>
417 <outputs>
418 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
419 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter>
421 </data>
422 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
423 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter>
424 </data>
425 </outputs>
426 <tests>
427 <test>
428 <param name="reference_source_selector" value="history" />
429 <param name="ref_file" ftype="fasta" value="phiX.fasta"/>
430 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/>
431 <param name="options_type_selector" value="basic"/>
432 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/>
433 </test>
434 </tests>
435 <help>
436 **What it does**
437
438 This tool uses FreeBayes 0.9.9 to call SNPS given a reference sequence and a BAM alignment file.
439
440 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
441
442 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
443
444 Go `here &lt;http://bioinformatics.bc.edu/marthlab/FreeBayes&gt;`_ for details on FreeBayes.
445
446 or `here &lt;https://github.com/ekg/freebayes&gt;`_
447 ------
448
449 **Inputs**
450
451 FreeBayes accepts an input aligned BAM file.
452
453
454 **Outputs**
455
456 The output is in the VCF format.
457
458 -------
459
460 **Settings**::
461
462 input and output:
463
464 -b --bam FILE Add FILE to the set of BAM files to be analyzed.
465 -c --stdin Read BAM input on stdin.
466 -v --vcf FILE Output VCF-format results to FILE.
467 -f --fasta-reference FILE
468 Use FILE as the reference sequence for analysis.
469 An index file (FILE.fai) will be created if none exists.
470 If neither --targets nor --region are specified, FreeBayes
471 will analyze every position in this reference.
472 -t --targets FILE
473 Limit analysis to targets listed in the BED-format FILE.
474 -r --region &lt;chrom&gt;:&lt;start_position&gt;..&lt;end_position&gt;
475 Limit analysis to the specified region, 0-base coordinates,
476 end_position not included (same as BED format).
477 -s --samples FILE
478 Limit analysis to samples listed (one per line) in the FILE.
479 By default FreeBayes will analyze all samples in its input
480 BAM files.
481 --populations FILE
482 Each line of FILE should list a sample and a population which
483 it is part of. The population-based bayesian inference model
484 will then be partitioned on the basis of the populations.
485 -A --cnv-map FILE
486 Read a copy number map from the BED file FILE, which has
487 the format:
488 reference sequence, start, end, sample name, copy number
489 ... for each region in each sample which does not have the
490 default copy number as set by --ploidy.
491 -L --trace FILE Output an algorithmic trace to FILE.
492 --failed-alleles FILE
493 Write a BED file of the analyzed positions which do not
494 pass --pvar to FILE.
495 -@ --variant-input VCF
496 Use variants reported in VCF file as input to the algorithm.
497 A report will be generated for every record in the VCF file.
498 -l --only-use-input-alleles
499 Only provide variant calls and genotype likelihoods for sites
500 and alleles which are provided in the VCF input, and provide
501 output in the VCF for all input alleles, not just those which
502 have support in the data.
503 --haplotype-basis-alleles VCF
504 When specified, only variant alleles provided in this input
505 VCF will be used for the construction of complex or haplotype
506 alleles.
507
508 reporting:
509
510 -P --pvar N Report sites if the probability that there is a polymorphism
511 at the site is greater than N. default: 0.0001
512 -_ --show-reference-repeats
513 Calculate and show information about reference repeats in
514 the VCF output.
515
516 population model:
517
518 -T --theta N The expected mutation rate or pairwise nucleotide diversity
519 among the population under analysis. This serves as the
520 single parameter to the Ewens Sampling Formula prior model
521 default: 0.001
522 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
523 -J --pooled Assume that samples result from pooled sequencing.
524 When using this flag, set --ploidy to the number of
525 alleles in each sample.
526
527 reference allele:
528
529 -Z --use-reference-allele
530 This flag includes the reference allele in the analysis as
531 if it is another sample from the same population.
532 -H --diploid-reference
533 If using the reference sequence as a sample (-Z),
534 treat it as diploid. default: false (reference is haploid)
535 --reference-quality MQ,BQ
536 Assign mapping quality of MQ to the reference allele at each
537 site and base quality of BQ. default: 100,60
538
539 allele scope:
540
541 -I --no-snps Ignore SNP alleles.
542 -i --no-indels Ignore insertion and deletion alleles.
543 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
544 -u --no-complex Ignore complex events (composites of other classes).
545 -n --use-best-n-alleles N
546 Evaluate only the best N SNP alleles, ranked by sum of
547 supporting quality scores. (Set to 0 to use all; default: all)
548 -E --max-complex-gap N
549 Allow complex alleles with contiguous embedded matches of up
550 to this length.
551
552 indel realignment:
553
554 -O --left-align-indels
555 Left-realign and merge gaps embedded in reads. default: false
556
557 input filters:
558
559 -4 --use-duplicate-reads
560 Include duplicate-marked alignments in the analysis.
561 default: exclude duplicates
562 -m --min-mapping-quality Q
563 Exclude alignments from analysis if they have a mapping
564 quality less than Q. default: 30
565 -q --min-base-quality Q
566 Exclude alleles from analysis if their supporting base
567 quality is less than Q. default: 20
568 -R --min-supporting-quality MQ,BQ
569 In order to consider an alternate allele, at least one supporting
570 alignment must have mapping quality MQ, and one supporting
571 allele must have base quality BQ. default: 0,0, unset
572 -Q --mismatch-base-quality-threshold Q
573 Count mismatches toward --read-mismatch-limit if the base
574 quality of the mismatch is &gt;= Q. default: 10
575 -U --read-mismatch-limit N
576 Exclude reads with more than N mismatches where each mismatch
577 has base quality &gt;= mismatch-base-quality-threshold.
578 default: ~unbounded
579 -z --read-max-mismatch-fraction N
580 Exclude reads with more than N [0,1] fraction of mismatches where
581 each mismatch has base quality &gt;= mismatch-base-quality-threshold
582 default: 1.0
583 -$ --read-snp-limit N
584 Exclude reads with more than N base mismatches, ignoring gaps
585 with quality &gt;= mismatch-base-quality-threshold.
586 default: ~unbounded
587 -e --read-indel-limit N
588 Exclude reads with more than N separate gaps.
589 default: ~unbounded
590 -0 --standard-filters Use stringent input base and mapping quality filters
591 Equivalent to -m 30 -q 20 -R 0 -S 0
592 -x --indel-exclusion-window
593 Ignore portions of alignments this many bases from a
594 putative insertion or deletion allele. default: 0
595 -F --min-alternate-fraction N
596 Require at least this fraction of observations supporting
597 an alternate allele within a single individual in the
598 in order to evaluate the position. default: 0.0
599 -C --min-alternate-count N
600 Require at least this count of observations supporting
601 an alternate allele within a single individual in order
602 to evaluate the position. default: 1
603 -3 --min-alternate-qsum N
604 Require at least this sum of quality of observations supporting
605 an alternate allele within a single individual in order
606 to evaluate the position. default: 0
607 -G --min-alternate-total N
608 Require at least this count of observations supporting
609 an alternate allele within the total population in order
610 to use the allele in analysis. default: 1
611 -! --min-coverage N
612 Require at least this coverage to process a site. default: 0
613
614 bayesian priors:
615
616 -Y --no-ewens-priors
617 Turns off the Ewens' Sampling Formula component of the priors.
618 -k --no-population-priors
619 Equivalent to --pooled --no-ewens-priors
620 -w --hwe-priors Use the probability of the combination arising under HWE given
621 the allele frequency as estimated by observation frequency.
622
623 observation prior expectations:
624
625 -V --binomial-obs-priors
626 Incorporate expectations about osbervations into the priors,
627 Uses read placement probability, strand balance probability,
628 and read position (5'-3') probability.
629 -a --allele-balance-priors
630 Use aggregate probability of observation balance between alleles
631 as a component of the priors. Best for observations with minimal
632 inherent reference bias.
633
634 algorithmic features:
635
636 -M --site-selection-max-iterations N
637 Uses hill-climbing algorithm to search posterior space for N
638 iterations to determine if the site should be evaluated. Set to 0
639 to prevent use of this algorithm for site selection, and
640 to a low integer for improvide site selection at a slight
641 performance penalty. default: 5.
642 -B --genotyping-max-iterations N
643 Iterate no more than N times during genotyping step. default: 25.
644 --genotyping-max-banddepth N
645 Integrate no deeper than the Nth best genotype by likelihood when
646 genotyping. default: 6.
647 -W --posterior-integration-limits N,M
648 Integrate all genotype combinations in our posterior space
649 which include no more than N samples with their Mth best
650 data likelihood. default: 1,3.
651 -K --no-permute
652 Do not scale prior probability of genotype combination given allele
653 frequency by the number of permutations of included genotypes.
654 -N --exclude-unobserved-genotypes
655 Skip sample genotypings for which the sample has no supporting reads.
656 -S --genotype-variant-threshold N
657 Limit posterior integration to samples where the second-best
658 genotype likelihood is no more than log(N) from the highest
659 genotype likelihood for the sample. default: ~unbounded
660 -j --use-mapping-quality
661 Use mapping quality of alleles when calculating data likelihoods.
662 -D --read-dependence-factor N
663 Incorporate non-independence of reads by scaling successive
664 observations by this factor during data likelihood
665 calculations. default: 0.9
666 -= --no-marginals
667 Do not calculate the marginal probability of genotypes. Saves
668 time and improves scaling performance in large populations.
669
670
671 ------
672
673 **Citation**
674
675 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing &lt;http://arxiv.org/abs/1207.3907&gt;`_.
676
677 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
678
679 </help>
680 </tool>