Mercurial > repos > fubar > freebayes099
comparison freebayes-d291dc763c4c/freebayes.xml @ 0:9a7e8a919c78 draft
Uploaded
author | fubar |
---|---|
date | Sat, 28 Sep 2013 05:57:27 -0400 |
parents | |
children | 5022cd74093c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9a7e8a919c78 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="freebayes" name="FreeBayes" version="0.9.9"> | |
3 <stdio> | |
4 <regex match=".*" source="both" level="warning" description="freebayes said:"/> | |
5 </stdio> | |
6 <requirements> | |
7 <requirement type="package" version="0.9.9_c993c5c07e7673">freebayes</requirement> | |
8 <requirement type="package" version="0.1.18">samtools</requirement> | |
9 </requirements> | |
10 <description> - Bayesian genetic variant detector</description> | |
11 <command> | |
12 ##set up input files | |
13 #set $reference_fasta_filename = "localref.fa" | |
14 #if str( $reference_source.reference_source_selector ) == "history": | |
15 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
16 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && | |
17 #else: | |
18 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
19 #end if | |
20 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
21 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && | |
22 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
23 #end for | |
24 ##finished setting up inputs | |
25 | |
26 ##start FreeBayes commandline | |
27 freebayes | |
28 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
29 --bam "localbam_${bam_count}.bam" | |
30 #end for | |
31 --fasta-reference "${reference_fasta_filename}" | |
32 | |
33 ##outputs | |
34 --vcf "${output_vcf}" | |
35 | |
36 ##advanced options | |
37 #if str( $options_type.options_type_selector ) == "advanced": | |
38 ##additional outputs | |
39 #if $options_type.output_trace_option: | |
40 --trace "${output_trace}" | |
41 #end if | |
42 #if $options_type.output_failed_alleles_option: | |
43 --failed-alleles "${output_failed_alleles_bed}" | |
44 #end if | |
45 | |
46 ##additional inputs | |
47 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file": | |
48 --targets "${options_type.target_limit_type.input_target_bed}" | |
49 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region": | |
50 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}" | |
51 #end if | |
52 #if $options_type.input_sample_file: | |
53 --samples "${options_type.input_sample_file}" | |
54 #end if | |
55 #if $options_type.input_populations_file: | |
56 --populations "${options_type.input_populations_file}" | |
57 #end if | |
58 #if $options_type.input_cnv_map_bed: | |
59 --cnv-map "${options_type.input_cnv_map_bed}" | |
60 #end if | |
61 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf": | |
62 --variant-input "${options_type.input_variant_type.input_variant_vcf}" | |
63 ${options_type.input_variant_type.only_use_input_alleles} | |
64 #end if | |
65 #if $options_type.haplotype_basis_alleles: | |
66 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}" | |
67 #end if | |
68 | |
69 | |
70 ##reporting | |
71 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set": | |
72 --pvar "${options_type.section_reporting_type.pvar}" | |
73 ${options_type.section_reporting_type.show_reference_repeats} | |
74 #end if | |
75 | |
76 ##population model | |
77 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set": | |
78 --theta "${options_type.section_population_model_type.theta}" | |
79 --ploidy "${options_type.section_population_model_type.ploidy}" | |
80 ${options_type.section_population_model_type.pooled} | |
81 #end if | |
82 | |
83 ##reference allele | |
84 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele": | |
85 --use-reference-allele | |
86 ${options_type.use_reference_allele_type.diploid_reference} | |
87 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}" | |
88 #end if | |
89 | |
90 ##allele scope | |
91 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set": | |
92 ${options_type.section_allele_scope_type.no_snps} | |
93 ${options_type.section_allele_scope_type.no_indels} | |
94 ${options_type.section_allele_scope_type.no_mnps} | |
95 ${options_type.section_allele_scope_type.no_complex} | |
96 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}" | |
97 #if $options_type.section_allele_scope_type.max_complex_gap: | |
98 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}" | |
99 #end if | |
100 #end if | |
101 | |
102 ##indel realignment | |
103 ${options_type.left_align_indels} | |
104 | |
105 ##input filters | |
106 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set": | |
107 ${options_type.section_input_filters_type.use_duplicate_reads} | |
108 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters": | |
109 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}" | |
110 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}" | |
111 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}" | |
112 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters": | |
113 --standard-filters | |
114 #end if | |
115 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}" | |
116 #if $options_type.section_input_filters_type.read_mismatch_limit: | |
117 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}" | |
118 #end if | |
119 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}" | |
120 #if $options_type.section_input_filters_type.read_snp_limit: | |
121 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}" | |
122 #end if | |
123 #if $options_type.section_input_filters_type.read_indel_limit: | |
124 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}" | |
125 #end if | |
126 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}" | |
127 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}" | |
128 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}" | |
129 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}" | |
130 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}" | |
131 --min-coverage "${options_type.section_input_filters_type.min_coverage}" | |
132 #end if | |
133 | |
134 ##bayesian priors | |
135 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set": | |
136 ${options_type.section_bayesian_priors_type.no_ewens_priors} | |
137 ${options_type.section_bayesian_priors_type.no_population_priors} | |
138 ${options_type.section_bayesian_priors_type.hwe_priors} | |
139 #end if | |
140 | |
141 ##observation prior expectations | |
142 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set": | |
143 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors} | |
144 ${options_type.section_observation_prior_expectations_type.allele_balance_priors} | |
145 #end if | |
146 | |
147 ##algorithmic features | |
148 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set": | |
149 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}" | |
150 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}" | |
151 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}" | |
152 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}" | |
153 ${options_type.section_algorithmic_features_type.no_permute} | |
154 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes} | |
155 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold: | |
156 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}" | |
157 #end if | |
158 ${options_type.section_algorithmic_features_type.use_mapping_quality} | |
159 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}" | |
160 ${options_type.section_algorithmic_features_type.no_marginals} | |
161 #end if | |
162 | |
163 #end if | |
164 </command> | |
165 <inputs> | |
166 <conditional name="reference_source"> | |
167 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
168 <option value="cached">Locally cached</option> | |
169 <option value="history">History</option> | |
170 </param> | |
171 <when value="cached"> | |
172 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
173 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
174 <validator type="unspecified_build" /> | |
175 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> | |
176 </param> | |
177 </repeat> | |
178 <param name="ref_file" type="select" label="Using reference genome"> | |
179 <options from_data_table="sam_fa_indexes"> | |
180 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> | |
181 </options> | |
182 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
183 </param> | |
184 </when> | |
185 <when value="history"> <!-- FIX ME!!!! --> | |
186 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
187 <param name="input_bam" type="data" format="bam" label="BAM file" /> | |
188 </repeat> | |
189 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
190 </when> | |
191 </conditional> | |
192 | |
193 <conditional name="options_type"> | |
194 <param name="options_type_selector" type="select" label="Basic or Advanced options"> | |
195 <option value="basic" selected="True">Basic</option> | |
196 <option value="advanced">Advanced</option> | |
197 </param> | |
198 <when value="basic"> | |
199 <!-- Do nothing here --> | |
200 </when> | |
201 <when value="advanced"> | |
202 | |
203 <!-- output --> | |
204 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" /> | |
205 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" /> | |
206 | |
207 | |
208 <!-- input --> | |
209 <conditional name="target_limit_type"> | |
210 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets"> | |
211 <option value="do_not_limit" selected="True">Do not limit</option> | |
212 <option value="limit_by_target_file">Limit by target file</option> | |
213 <option value="limit_by_region">Limit to region</option> | |
214 </param> | |
215 <when value="do_not_limit"> | |
216 <!-- Do nothing here --> | |
217 </when> | |
218 <when value="limit_by_target_file"> | |
219 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." /> | |
220 </when> | |
221 <when value="limit_by_region"> | |
222 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? --> | |
223 <param name="region_start" type="integer" label="Region Start" value="" /> | |
224 <param name="region_end" type="integer" label="Region End" value="" /> | |
225 </when> | |
226 </conditional> | |
227 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" /> | |
228 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" /> | |
229 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" /> | |
230 <conditional name="input_variant_type"> | |
231 <param name="input_variant_type_selector" type="select" label="Provide variants file"> | |
232 <option value="do_not_provide" selected="True">Do not provide</option> | |
233 <option value="provide_vcf">Provide VCF file</option> | |
234 </param> | |
235 <when value="do_not_provide"> | |
236 <!-- Do nothing here --> | |
237 </when> | |
238 <when value="provide_vcf"> | |
239 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" /> | |
240 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> | |
241 </when> | |
242 </conditional> | |
243 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" /> | |
244 | |
245 <!-- reporting --> | |
246 <conditional name="section_reporting_type"> | |
247 <param name="section_reporting_type_selector" type="select" label="Set Reporting options"> | |
248 <option value="do_not_set" selected="True">Do not set</option> | |
249 <option value="set">Set</option> | |
250 </param> | |
251 <when value="do_not_set"> | |
252 <!-- do nothing here --> | |
253 </when> | |
254 <when value="set"> | |
255 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" /> | |
256 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" /> | |
257 </when> | |
258 </conditional> | |
259 | |
260 | |
261 <!-- population model --> | |
262 <conditional name="section_population_model_type"> | |
263 <param name="section_population_model_type_selector" type="select" label="Set population model options"> | |
264 <option value="do_not_set" selected="True">Do not set</option> | |
265 <option value="set">Set</option> | |
266 </param> | |
267 <when value="do_not_set"> | |
268 <!-- do nothing here --> | |
269 </when> | |
270 <when value="set"> | |
271 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/> | |
272 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" /> | |
273 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." /> | |
274 </when> | |
275 </conditional> | |
276 | |
277 <!-- reference allele --> | |
278 <conditional name="use_reference_allele_type"> | |
279 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis"> | |
280 <option value="do_not_include_reference_allele" selected="True">Do not include</option> | |
281 <option value="include_reference_allele">Include</option> | |
282 </param> | |
283 <when value="do_not_include_reference_allele"> | |
284 <!-- Do nothing here --> | |
285 </when> | |
286 <when value="include_reference_allele"> | |
287 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" /> | |
288 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" /> | |
289 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" /> | |
290 </when> | |
291 </conditional> | |
292 | |
293 <!-- allele scope --> | |
294 <conditional name="section_allele_scope_type"> | |
295 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options"> | |
296 <option value="do_not_set" selected="True">Do not set</option> | |
297 <option value="set">Set</option> | |
298 </param> | |
299 <when value="do_not_set"> | |
300 <!-- do nothing here --> | |
301 </when> | |
302 <when value="set"> | |
303 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" /> | |
304 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" /> | |
305 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" /> | |
306 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" /> | |
307 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" /> | |
308 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/> | |
309 </when> | |
310 </conditional> | |
311 | |
312 <!-- indel realignment --> | |
313 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" /> | |
314 | |
315 <!-- input filters --> | |
316 <conditional name="section_input_filters_type"> | |
317 <param name="section_input_filters_type_selector" type="select" label="Set input filters options"> | |
318 <option value="do_not_set" selected="True">Do not set</option> | |
319 <option value="set">Set</option> | |
320 </param> | |
321 <when value="do_not_set"> | |
322 <!-- do nothing here --> | |
323 </when> | |
324 <when value="set"> | |
325 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" /> | |
326 <conditional name="quality_filter_type"> | |
327 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters"> | |
328 <option value="standard_filters" selected="True">Apply standard</option> | |
329 <option value="apply_filters">Apply specified</option> | |
330 </param> | |
331 <when value="standard_filters"> | |
332 <!-- Do nothing here --> <!-- standard-filters --> | |
333 </when> | |
334 <when value="apply_filters"> | |
335 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" /> | |
336 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" /> | |
337 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" /> | |
338 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" /> | |
339 </when> | |
340 </conditional> | |
341 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" /> | |
342 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
343 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" /> | |
344 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
345 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" /> | |
346 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" /> | |
347 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" /> | |
348 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" /> | |
349 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" /> | |
350 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" /> | |
351 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" /> | |
352 </when> | |
353 </conditional> | |
354 | |
355 | |
356 <!-- bayesian priors --> | |
357 <conditional name="section_bayesian_priors_type"> | |
358 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options"> | |
359 <option value="do_not_set" selected="True">Do not set</option> | |
360 <option value="set">Set</option> | |
361 </param> | |
362 <when value="do_not_set"> | |
363 <!-- do nothing here --> | |
364 </when> | |
365 <when value="set"> | |
366 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" /> | |
367 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" /> | |
368 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" /> | |
369 </when> | |
370 </conditional> | |
371 | |
372 <!-- observation prior expectations --> | |
373 <conditional name="section_observation_prior_expectations_type"> | |
374 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options"> | |
375 <option value="do_not_set" selected="True">Do not set</option> | |
376 <option value="set">Set</option> | |
377 </param> | |
378 <when value="do_not_set"> | |
379 <!-- do nothing here --> | |
380 </when> | |
381 <when value="set"> | |
382 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" /> | |
383 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" /> | |
384 </when> | |
385 </conditional> | |
386 | |
387 | |
388 <!-- algorithmic features --> | |
389 <conditional name="section_algorithmic_features_type"> | |
390 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options"> | |
391 <option value="do_not_set" selected="True">Do not set</option> | |
392 <option value="set">Set</option> | |
393 </param> | |
394 <when value="do_not_set"> | |
395 <!-- do nothing here --> | |
396 </when> | |
397 <when value="set"> | |
398 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" /> | |
399 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" /> | |
400 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" /> | |
401 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" /> | |
402 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" /> | |
403 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" /> | |
404 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" /> | |
405 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" /> | |
406 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" /> | |
407 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" /> | |
408 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" /> | |
409 </when> | |
410 </conditional> | |
411 | |
412 | |
413 </when> | |
414 </conditional> | |
415 | |
416 </inputs> | |
417 <outputs> | |
418 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> | |
419 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> | |
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter> | |
421 </data> | |
422 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> | |
423 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter> | |
424 </data> | |
425 </outputs> | |
426 <tests> | |
427 <test> | |
428 <param name="reference_source_selector" value="history" /> | |
429 <param name="ref_file" ftype="fasta" value="phiX.fasta"/> | |
430 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/> | |
431 <param name="options_type_selector" value="basic"/> | |
432 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/> | |
433 </test> | |
434 </tests> | |
435 <help> | |
436 **What it does** | |
437 | |
438 This tool uses FreeBayes 0.9.9 to call SNPS given a reference sequence and a BAM alignment file. | |
439 | |
440 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners. | |
441 | |
442 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development. | |
443 | |
444 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes. | |
445 | |
446 or `here <https://github.com/ekg/freebayes>`_ | |
447 ------ | |
448 | |
449 **Inputs** | |
450 | |
451 FreeBayes accepts an input aligned BAM file. | |
452 | |
453 | |
454 **Outputs** | |
455 | |
456 The output is in the VCF format. | |
457 | |
458 ------- | |
459 | |
460 **Settings**:: | |
461 | |
462 input and output: | |
463 | |
464 -b --bam FILE Add FILE to the set of BAM files to be analyzed. | |
465 -c --stdin Read BAM input on stdin. | |
466 -v --vcf FILE Output VCF-format results to FILE. | |
467 -f --fasta-reference FILE | |
468 Use FILE as the reference sequence for analysis. | |
469 An index file (FILE.fai) will be created if none exists. | |
470 If neither --targets nor --region are specified, FreeBayes | |
471 will analyze every position in this reference. | |
472 -t --targets FILE | |
473 Limit analysis to targets listed in the BED-format FILE. | |
474 -r --region <chrom>:<start_position>..<end_position> | |
475 Limit analysis to the specified region, 0-base coordinates, | |
476 end_position not included (same as BED format). | |
477 -s --samples FILE | |
478 Limit analysis to samples listed (one per line) in the FILE. | |
479 By default FreeBayes will analyze all samples in its input | |
480 BAM files. | |
481 --populations FILE | |
482 Each line of FILE should list a sample and a population which | |
483 it is part of. The population-based bayesian inference model | |
484 will then be partitioned on the basis of the populations. | |
485 -A --cnv-map FILE | |
486 Read a copy number map from the BED file FILE, which has | |
487 the format: | |
488 reference sequence, start, end, sample name, copy number | |
489 ... for each region in each sample which does not have the | |
490 default copy number as set by --ploidy. | |
491 -L --trace FILE Output an algorithmic trace to FILE. | |
492 --failed-alleles FILE | |
493 Write a BED file of the analyzed positions which do not | |
494 pass --pvar to FILE. | |
495 -@ --variant-input VCF | |
496 Use variants reported in VCF file as input to the algorithm. | |
497 A report will be generated for every record in the VCF file. | |
498 -l --only-use-input-alleles | |
499 Only provide variant calls and genotype likelihoods for sites | |
500 and alleles which are provided in the VCF input, and provide | |
501 output in the VCF for all input alleles, not just those which | |
502 have support in the data. | |
503 --haplotype-basis-alleles VCF | |
504 When specified, only variant alleles provided in this input | |
505 VCF will be used for the construction of complex or haplotype | |
506 alleles. | |
507 | |
508 reporting: | |
509 | |
510 -P --pvar N Report sites if the probability that there is a polymorphism | |
511 at the site is greater than N. default: 0.0001 | |
512 -_ --show-reference-repeats | |
513 Calculate and show information about reference repeats in | |
514 the VCF output. | |
515 | |
516 population model: | |
517 | |
518 -T --theta N The expected mutation rate or pairwise nucleotide diversity | |
519 among the population under analysis. This serves as the | |
520 single parameter to the Ewens Sampling Formula prior model | |
521 default: 0.001 | |
522 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2 | |
523 -J --pooled Assume that samples result from pooled sequencing. | |
524 When using this flag, set --ploidy to the number of | |
525 alleles in each sample. | |
526 | |
527 reference allele: | |
528 | |
529 -Z --use-reference-allele | |
530 This flag includes the reference allele in the analysis as | |
531 if it is another sample from the same population. | |
532 -H --diploid-reference | |
533 If using the reference sequence as a sample (-Z), | |
534 treat it as diploid. default: false (reference is haploid) | |
535 --reference-quality MQ,BQ | |
536 Assign mapping quality of MQ to the reference allele at each | |
537 site and base quality of BQ. default: 100,60 | |
538 | |
539 allele scope: | |
540 | |
541 -I --no-snps Ignore SNP alleles. | |
542 -i --no-indels Ignore insertion and deletion alleles. | |
543 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs. | |
544 -u --no-complex Ignore complex events (composites of other classes). | |
545 -n --use-best-n-alleles N | |
546 Evaluate only the best N SNP alleles, ranked by sum of | |
547 supporting quality scores. (Set to 0 to use all; default: all) | |
548 -E --max-complex-gap N | |
549 Allow complex alleles with contiguous embedded matches of up | |
550 to this length. | |
551 | |
552 indel realignment: | |
553 | |
554 -O --left-align-indels | |
555 Left-realign and merge gaps embedded in reads. default: false | |
556 | |
557 input filters: | |
558 | |
559 -4 --use-duplicate-reads | |
560 Include duplicate-marked alignments in the analysis. | |
561 default: exclude duplicates | |
562 -m --min-mapping-quality Q | |
563 Exclude alignments from analysis if they have a mapping | |
564 quality less than Q. default: 30 | |
565 -q --min-base-quality Q | |
566 Exclude alleles from analysis if their supporting base | |
567 quality is less than Q. default: 20 | |
568 -R --min-supporting-quality MQ,BQ | |
569 In order to consider an alternate allele, at least one supporting | |
570 alignment must have mapping quality MQ, and one supporting | |
571 allele must have base quality BQ. default: 0,0, unset | |
572 -Q --mismatch-base-quality-threshold Q | |
573 Count mismatches toward --read-mismatch-limit if the base | |
574 quality of the mismatch is >= Q. default: 10 | |
575 -U --read-mismatch-limit N | |
576 Exclude reads with more than N mismatches where each mismatch | |
577 has base quality >= mismatch-base-quality-threshold. | |
578 default: ~unbounded | |
579 -z --read-max-mismatch-fraction N | |
580 Exclude reads with more than N [0,1] fraction of mismatches where | |
581 each mismatch has base quality >= mismatch-base-quality-threshold | |
582 default: 1.0 | |
583 -$ --read-snp-limit N | |
584 Exclude reads with more than N base mismatches, ignoring gaps | |
585 with quality >= mismatch-base-quality-threshold. | |
586 default: ~unbounded | |
587 -e --read-indel-limit N | |
588 Exclude reads with more than N separate gaps. | |
589 default: ~unbounded | |
590 -0 --standard-filters Use stringent input base and mapping quality filters | |
591 Equivalent to -m 30 -q 20 -R 0 -S 0 | |
592 -x --indel-exclusion-window | |
593 Ignore portions of alignments this many bases from a | |
594 putative insertion or deletion allele. default: 0 | |
595 -F --min-alternate-fraction N | |
596 Require at least this fraction of observations supporting | |
597 an alternate allele within a single individual in the | |
598 in order to evaluate the position. default: 0.0 | |
599 -C --min-alternate-count N | |
600 Require at least this count of observations supporting | |
601 an alternate allele within a single individual in order | |
602 to evaluate the position. default: 1 | |
603 -3 --min-alternate-qsum N | |
604 Require at least this sum of quality of observations supporting | |
605 an alternate allele within a single individual in order | |
606 to evaluate the position. default: 0 | |
607 -G --min-alternate-total N | |
608 Require at least this count of observations supporting | |
609 an alternate allele within the total population in order | |
610 to use the allele in analysis. default: 1 | |
611 -! --min-coverage N | |
612 Require at least this coverage to process a site. default: 0 | |
613 | |
614 bayesian priors: | |
615 | |
616 -Y --no-ewens-priors | |
617 Turns off the Ewens' Sampling Formula component of the priors. | |
618 -k --no-population-priors | |
619 Equivalent to --pooled --no-ewens-priors | |
620 -w --hwe-priors Use the probability of the combination arising under HWE given | |
621 the allele frequency as estimated by observation frequency. | |
622 | |
623 observation prior expectations: | |
624 | |
625 -V --binomial-obs-priors | |
626 Incorporate expectations about osbervations into the priors, | |
627 Uses read placement probability, strand balance probability, | |
628 and read position (5'-3') probability. | |
629 -a --allele-balance-priors | |
630 Use aggregate probability of observation balance between alleles | |
631 as a component of the priors. Best for observations with minimal | |
632 inherent reference bias. | |
633 | |
634 algorithmic features: | |
635 | |
636 -M --site-selection-max-iterations N | |
637 Uses hill-climbing algorithm to search posterior space for N | |
638 iterations to determine if the site should be evaluated. Set to 0 | |
639 to prevent use of this algorithm for site selection, and | |
640 to a low integer for improvide site selection at a slight | |
641 performance penalty. default: 5. | |
642 -B --genotyping-max-iterations N | |
643 Iterate no more than N times during genotyping step. default: 25. | |
644 --genotyping-max-banddepth N | |
645 Integrate no deeper than the Nth best genotype by likelihood when | |
646 genotyping. default: 6. | |
647 -W --posterior-integration-limits N,M | |
648 Integrate all genotype combinations in our posterior space | |
649 which include no more than N samples with their Mth best | |
650 data likelihood. default: 1,3. | |
651 -K --no-permute | |
652 Do not scale prior probability of genotype combination given allele | |
653 frequency by the number of permutations of included genotypes. | |
654 -N --exclude-unobserved-genotypes | |
655 Skip sample genotypings for which the sample has no supporting reads. | |
656 -S --genotype-variant-threshold N | |
657 Limit posterior integration to samples where the second-best | |
658 genotype likelihood is no more than log(N) from the highest | |
659 genotype likelihood for the sample. default: ~unbounded | |
660 -j --use-mapping-quality | |
661 Use mapping quality of alleles when calculating data likelihoods. | |
662 -D --read-dependence-factor N | |
663 Incorporate non-independence of reads by scaling successive | |
664 observations by this factor during data likelihood | |
665 calculations. default: 0.9 | |
666 -= --no-marginals | |
667 Do not calculate the marginal probability of genotypes. Saves | |
668 time and improves scaling performance in large populations. | |
669 | |
670 | |
671 ------ | |
672 | |
673 **Citation** | |
674 | |
675 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_. | |
676 | |
677 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | |
678 | |
679 </help> | |
680 </tool> |