comparison obo_scripts.xml @ 1:569830319099 draft

adding test files
author jorrit <jhpoelen@xs4all.nl>
date Thu, 07 Feb 2013 13:32:43 -0800
parents
children
comparison
equal deleted inserted replaced
0:0108dcc237ea 1:569830319099
1 <?xml version="1.0"?>
2 <tool id="fetch_obo_ontology2" name="FetchOboOntology2" version="0.0.8">
3 <requirements>
4 <requirement type="package" version="0.9.6_9608597d12e127c847ae03aa03440ab63992fedf">freebayes</requirement>
5 <requirement type="package" version="0.1.18">samtools</requirement>
6 </requirements>
7 <description> - obo scripts</description>
8 <command>
9 ##set up input files
10 #set $reference_fasta_filename = "localref.fa"
11 #if str( $reference_source.reference_source_selector ) == "history":
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
13 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp;
14 #else:
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
16 #end if
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
20 #end for
21 ##finished setting up inputs
22
23 ##start FreeBayes commandline
24 freebayes
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
26 --bam "localbam_${bam_count}.bam"
27 #end for
28 --fasta-reference "${reference_fasta_filename}"
29
30 ##outputs
31 --vcf "${output_vcf}"
32
33 ##advanced options
34 #if str( $options_type.options_type_selector ) == "advanced":
35 ##additional outputs
36 #if $options_type.output_trace_option:
37 --trace "${output_trace}"
38 #end if
39 #if $options_type.output_failed_alleles_option:
40 --failed-alleles "${output_failed_alleles_bed}"
41 #end if
42
43 ##additional inputs
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
45 --targets "${options_type.target_limit_type.input_target_bed}"
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
48 #end if
49 #if $options_type.input_sample_file:
50 --samples "${options_type.input_sample_file}"
51 #end if
52 #if $options_type.input_populations_file:
53 --populations "${options_type.input_populations_file}"
54 #end if
55 #if $options_type.input_cnv_map_bed:
56 --cnv-map "${options_type.input_cnv_map_bed}"
57 #end if
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}"
60 ${options_type.input_variant_type.only_use_input_alleles}
61 #end if
62 #if $options_type.haplotype_basis_alleles:
63 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}"
64 #end if
65
66
67 ##reporting
68 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
69 --pvar "${options_type.section_reporting_type.pvar}"
70 ${options_type.section_reporting_type.show_reference_repeats}
71 #end if
72
73 ##population model
74 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
75 --theta "${options_type.section_population_model_type.theta}"
76 --ploidy "${options_type.section_population_model_type.ploidy}"
77 ${options_type.section_population_model_type.pooled}
78 #end if
79
80 ##reference allele
81 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
82 --use-reference-allele
83 ${options_type.use_reference_allele_type.diploid_reference}
84 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
85 #end if
86
87 ##allele scope
88 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
89 ${options_type.section_allele_scope_type.no_snps}
90 ${options_type.section_allele_scope_type.no_indels}
91 ${options_type.section_allele_scope_type.no_mnps}
92 ${options_type.section_allele_scope_type.no_complex}
93 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
94 #if $options_type.section_allele_scope_type.max_complex_gap:
95 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
96 #end if
97 #end if
98
99 ##indel realignment
100 ${options_type.left_align_indels}
101
102 ##input filters
103 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
104 ${options_type.section_input_filters_type.use_duplicate_reads}
105 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters":
106 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}"
107 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}"
108 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}"
109 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters":
110 --standard-filters
111 #end if
112 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
113 #if $options_type.section_input_filters_type.read_mismatch_limit:
114 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
115 #end if
116 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
117 #if $options_type.section_input_filters_type.read_snp_limit:
118 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
119 #end if
120 #if $options_type.section_input_filters_type.read_indel_limit:
121 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
122 #end if
123 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
124 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
125 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
126 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
127 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
128 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
129 #end if
130
131 ##bayesian priors
132 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
133 ${options_type.section_bayesian_priors_type.no_ewens_priors}
134 ${options_type.section_bayesian_priors_type.no_population_priors}
135 ${options_type.section_bayesian_priors_type.hwe_priors}
136 #end if
137
138 ##observation prior expectations
139 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
140 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
141 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
142 #end if
143
144 ##algorithmic features
145 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
146 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
147 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
148 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
149 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
150 ${options_type.section_algorithmic_features_type.no_permute}
151 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
152 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
153 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
154 #end if
155 ${options_type.section_algorithmic_features_type.use_mapping_quality}
156 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
157 ${options_type.section_algorithmic_features_type.no_marginals}
158 #end if
159
160 #end if
161 </command>
162 <inputs>
163 <conditional name="reference_source">
164 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
165 <option value="cached">Locally cached</option>
166 <option value="history">History</option>
167 </param>
168 <when value="cached">
169 <repeat name="input_bams" title="Sample BAM file" min="1">
170 <param name="input_bam" type="data" format="bam" label="BAM file">
171 <validator type="unspecified_build" />
172 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." />
173 </param>
174 </repeat>
175 <param name="ref_file" type="select" label="Using reference genome">
176 <options from_data_table="sam_fa_indexes">
177 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
178 </options>
179 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
180 </param>
181 </when>
182 <when value="history"> <!-- FIX ME!!!! -->
183 <repeat name="input_bams" title="Sample BAM file" min="1">
184 <param name="input_bam" type="data" format="bam" label="BAM file" />
185 </repeat>
186 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
187 </when>
188 </conditional>
189
190 <conditional name="options_type">
191 <param name="options_type_selector" type="select" label="Basic or Advanced options">
192 <option value="basic" selected="True">Basic</option>
193 <option value="advanced">Advanced</option>
194 </param>
195 <when value="basic">
196 <!-- Do nothing here -->
197 </when>
198 <when value="advanced">
199
200 <!-- output -->
201 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" />
202 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" />
203
204
205 <!-- input -->
206 <conditional name="target_limit_type">
207 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets">
208 <option value="do_not_limit" selected="True">Do not limit</option>
209 <option value="limit_by_target_file">Limit by target file</option>
210 <option value="limit_by_region">Limit to region</option>
211 </param>
212 <when value="do_not_limit">
213 <!-- Do nothing here -->
214 </when>
215 <when value="limit_by_target_file">
216 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." />
217 </when>
218 <when value="limit_by_region">
219 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
220 <param name="region_start" type="integer" label="Region Start" value="" />
221 <param name="region_end" type="integer" label="Region End" value="" />
222 </when>
223 </conditional>
224 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" />
225 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" />
226 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" />
227 <conditional name="input_variant_type">
228 <param name="input_variant_type_selector" type="select" label="Provide variants file">
229 <option value="do_not_provide" selected="True">Do not provide</option>
230 <option value="provide_vcf">Provide VCF file</option>
231 </param>
232 <when value="do_not_provide">
233 <!-- Do nothing here -->
234 </when>
235 <when value="provide_vcf">
236 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" />
237 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
238 </when>
239 </conditional>
240 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" />
241
242 <!-- reporting -->
243 <conditional name="section_reporting_type">
244 <param name="section_reporting_type_selector" type="select" label="Set Reporting options">
245 <option value="do_not_set" selected="True">Do not set</option>
246 <option value="set">Set</option>
247 </param>
248 <when value="do_not_set">
249 <!-- do nothing here -->
250 </when>
251 <when value="set">
252 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
253 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
254 </when>
255 </conditional>
256
257
258 <!-- population model -->
259 <conditional name="section_population_model_type">
260 <param name="section_population_model_type_selector" type="select" label="Set population model options">
261 <option value="do_not_set" selected="True">Do not set</option>
262 <option value="set">Set</option>
263 </param>
264 <when value="do_not_set">
265 <!-- do nothing here -->
266 </when>
267 <when value="set">
268 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
269 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
270 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
271 </when>
272 </conditional>
273
274 <!-- reference allele -->
275 <conditional name="use_reference_allele_type">
276 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
277 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
278 <option value="include_reference_allele">Include</option>
279 </param>
280 <when value="do_not_include_reference_allele">
281 <!-- Do nothing here -->
282 </when>
283 <when value="include_reference_allele">
284 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
285 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
286 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
287 </when>
288 </conditional>
289
290 <!-- allele scope -->
291 <conditional name="section_allele_scope_type">
292 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
293 <option value="do_not_set" selected="True">Do not set</option>
294 <option value="set">Set</option>
295 </param>
296 <when value="do_not_set">
297 <!-- do nothing here -->
298 </when>
299 <when value="set">
300 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
301 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
302 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
303 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
304 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
305 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
306 </when>
307 </conditional>
308
309 <!-- indel realignment -->
310 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
311
312 <!-- input filters -->
313 <conditional name="section_input_filters_type">
314 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
315 <option value="do_not_set" selected="True">Do not set</option>
316 <option value="set">Set</option>
317 </param>
318 <when value="do_not_set">
319 <!-- do nothing here -->
320 </when>
321 <when value="set">
322 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
323 <conditional name="quality_filter_type">
324 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters">
325 <option value="standard_filters" selected="True">Apply standard</option>
326 <option value="apply_filters">Apply specified</option>
327 </param>
328 <when value="standard_filters">
329 <!-- Do nothing here --> <!-- standard-filters -->
330 </when>
331 <when value="apply_filters">
332 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" />
333 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" />
334 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
335 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
336 </when>
337 </conditional>
338 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is &gt;=" value="10" />
339 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
340 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="1.0" />
341 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
342 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
343 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
344 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
345 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
346 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
347 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
348 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
349 </when>
350 </conditional>
351
352
353 <!-- bayesian priors -->
354 <conditional name="section_bayesian_priors_type">
355 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
356 <option value="do_not_set" selected="True">Do not set</option>
357 <option value="set">Set</option>
358 </param>
359 <when value="do_not_set">
360 <!-- do nothing here -->
361 </when>
362 <when value="set">
363 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
364 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
365 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
366 </when>
367 </conditional>
368
369 <!-- observation prior expectations -->
370 <conditional name="section_observation_prior_expectations_type">
371 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
372 <option value="do_not_set" selected="True">Do not set</option>
373 <option value="set">Set</option>
374 </param>
375 <when value="do_not_set">
376 <!-- do nothing here -->
377 </when>
378 <when value="set">
379 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
380 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
381 </when>
382 </conditional>
383
384
385 <!-- algorithmic features -->
386 <conditional name="section_algorithmic_features_type">
387 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
388 <option value="do_not_set" selected="True">Do not set</option>
389 <option value="set">Set</option>
390 </param>
391 <when value="do_not_set">
392 <!-- do nothing here -->
393 </when>
394 <when value="set">
395 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
396 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
397 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
398 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
399 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
400 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
401 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
402 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
403 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
404 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
405 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
406 </when>
407 </conditional>
408
409
410 </when>
411 </conditional>
412
413 </inputs>
414 <outputs>
415 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
416 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
417 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter>
418 </data>
419 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter>
421 </data>
422 </outputs>
423 <tests>
424 <test>
425 <param name="reference_source_selector" value="history" />
426 <param name="ref_file" ftype="fasta" value="phiX.fasta"/>
427 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/>
428 <param name="options_type_selector" value="basic"/>
429 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/>
430 </test>
431 </tests>
432 <help>
433 **What it does**
434
435 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file.
436
437 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
438
439 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
440
441 Go `here &lt;http://bioinformatics.bc.edu/marthlab/FreeBayes&gt;`_ for details on FreeBayes.
442
443 ------
444
445 **Inputs**
446
447 FreeBayes accepts an input aligned BAM file.
448
449
450 **Outputs**
451
452 The output is in the VCF format.
453
454 -------
455
456 **Settings**::
457
458 input and output:
459
460 -b --bam FILE Add FILE to the set of BAM files to be analyzed.
461 -c --stdin Read BAM input on stdin.
462 -v --vcf FILE Output VCF-format results to FILE.
463 -f --fasta-reference FILE
464 Use FILE as the reference sequence for analysis.
465 An index file (FILE.fai) will be created if none exists.
466 If neither --targets nor --region are specified, FreeBayes
467 will analyze every position in this reference.
468 -t --targets FILE
469 Limit analysis to targets listed in the BED-format FILE.
470 -r --region &lt;chrom&gt;:&lt;start_position&gt;..&lt;end_position&gt;
471 Limit analysis to the specified region, 0-base coordinates,
472 end_position not included (same as BED format).
473 -s --samples FILE
474 Limit analysis to samples listed (one per line) in the FILE.
475 By default FreeBayes will analyze all samples in its input
476 BAM files.
477 --populations FILE
478 Each line of FILE should list a sample and a population which
479 it is part of. The population-based bayesian inference model
480 will then be partitioned on the basis of the populations.
481 -A --cnv-map FILE
482 Read a copy number map from the BED file FILE, which has
483 the format:
484 reference sequence, start, end, sample name, copy number
485 ... for each region in each sample which does not have the
486 default copy number as set by --ploidy.
487 -L --trace FILE Output an algorithmic trace to FILE.
488 --failed-alleles FILE
489 Write a BED file of the analyzed positions which do not
490 pass --pvar to FILE.
491 -@ --variant-input VCF
492 Use variants reported in VCF file as input to the algorithm.
493 A report will be generated for every record in the VCF file.
494 -l --only-use-input-alleles
495 Only provide variant calls and genotype likelihoods for sites
496 and alleles which are provided in the VCF input, and provide
497 output in the VCF for all input alleles, not just those which
498 have support in the data.
499 --haplotype-basis-alleles VCF
500 When specified, only variant alleles provided in this input
501 VCF will be used for the construction of complex or haplotype
502 alleles.
503
504 reporting:
505
506 -P --pvar N Report sites if the probability that there is a polymorphism
507 at the site is greater than N. default: 0.0001
508 -_ --show-reference-repeats
509 Calculate and show information about reference repeats in
510 the VCF output.
511
512 population model:
513
514 -T --theta N The expected mutation rate or pairwise nucleotide diversity
515 among the population under analysis. This serves as the
516 single parameter to the Ewens Sampling Formula prior model
517 default: 0.001
518 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
519 -J --pooled Assume that samples result from pooled sequencing.
520 When using this flag, set --ploidy to the number of
521 alleles in each sample.
522
523 reference allele:
524
525 -Z --use-reference-allele
526 This flag includes the reference allele in the analysis as
527 if it is another sample from the same population.
528 -H --diploid-reference
529 If using the reference sequence as a sample (-Z),
530 treat it as diploid. default: false (reference is haploid)
531 --reference-quality MQ,BQ
532 Assign mapping quality of MQ to the reference allele at each
533 site and base quality of BQ. default: 100,60
534
535 allele scope:
536
537 -I --no-snps Ignore SNP alleles.
538 -i --no-indels Ignore insertion and deletion alleles.
539 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
540 -u --no-complex Ignore complex events (composites of other classes).
541 -n --use-best-n-alleles N
542 Evaluate only the best N SNP alleles, ranked by sum of
543 supporting quality scores. (Set to 0 to use all; default: all)
544 -E --max-complex-gap N
545 Allow complex alleles with contiguous embedded matches of up
546 to this length.
547
548 indel realignment:
549
550 -O --left-align-indels
551 Left-realign and merge gaps embedded in reads. default: false
552
553 input filters:
554
555 -4 --use-duplicate-reads
556 Include duplicate-marked alignments in the analysis.
557 default: exclude duplicates
558 -m --min-mapping-quality Q
559 Exclude alignments from analysis if they have a mapping
560 quality less than Q. default: 30
561 -q --min-base-quality Q
562 Exclude alleles from analysis if their supporting base
563 quality is less than Q. default: 20
564 -R --min-supporting-quality MQ,BQ
565 In order to consider an alternate allele, at least one supporting
566 alignment must have mapping quality MQ, and one supporting
567 allele must have base quality BQ. default: 0,0, unset
568 -Q --mismatch-base-quality-threshold Q
569 Count mismatches toward --read-mismatch-limit if the base
570 quality of the mismatch is &gt;= Q. default: 10
571 -U --read-mismatch-limit N
572 Exclude reads with more than N mismatches where each mismatch
573 has base quality &gt;= mismatch-base-quality-threshold.
574 default: ~unbounded
575 -z --read-max-mismatch-fraction N
576 Exclude reads with more than N [0,1] fraction of mismatches where
577 each mismatch has base quality &gt;= mismatch-base-quality-threshold
578 default: 1.0
579 -$ --read-snp-limit N
580 Exclude reads with more than N base mismatches, ignoring gaps
581 with quality &gt;= mismatch-base-quality-threshold.
582 default: ~unbounded
583 -e --read-indel-limit N
584 Exclude reads with more than N separate gaps.
585 default: ~unbounded
586 -0 --standard-filters Use stringent input base and mapping quality filters
587 Equivalent to -m 30 -q 20 -R 0 -S 0
588 -x --indel-exclusion-window
589 Ignore portions of alignments this many bases from a
590 putative insertion or deletion allele. default: 0
591 -F --min-alternate-fraction N
592 Require at least this fraction of observations supporting
593 an alternate allele within a single individual in the
594 in order to evaluate the position. default: 0.0
595 -C --min-alternate-count N
596 Require at least this count of observations supporting
597 an alternate allele within a single individual in order
598 to evaluate the position. default: 1
599 -3 --min-alternate-qsum N
600 Require at least this sum of quality of observations supporting
601 an alternate allele within a single individual in order
602 to evaluate the position. default: 0
603 -G --min-alternate-total N
604 Require at least this count of observations supporting
605 an alternate allele within the total population in order
606 to use the allele in analysis. default: 1
607 -! --min-coverage N
608 Require at least this coverage to process a site. default: 0
609
610 bayesian priors:
611
612 -Y --no-ewens-priors
613 Turns off the Ewens' Sampling Formula component of the priors.
614 -k --no-population-priors
615 Equivalent to --pooled --no-ewens-priors
616 -w --hwe-priors Use the probability of the combination arising under HWE given
617 the allele frequency as estimated by observation frequency.
618
619 observation prior expectations:
620
621 -V --binomial-obs-priors
622 Incorporate expectations about osbervations into the priors,
623 Uses read placement probability, strand balance probability,
624 and read position (5'-3') probability.
625 -a --allele-balance-priors
626 Use aggregate probability of observation balance between alleles
627 as a component of the priors. Best for observations with minimal
628 inherent reference bias.
629
630 algorithmic features:
631
632 -M --site-selection-max-iterations N
633 Uses hill-climbing algorithm to search posterior space for N
634 iterations to determine if the site should be evaluated. Set to 0
635 to prevent use of this algorithm for site selection, and
636 to a low integer for improvide site selection at a slight
637 performance penalty. default: 5.
638 -B --genotyping-max-iterations N
639 Iterate no more than N times during genotyping step. default: 25.
640 --genotyping-max-banddepth N
641 Integrate no deeper than the Nth best genotype by likelihood when
642 genotyping. default: 6.
643 -W --posterior-integration-limits N,M
644 Integrate all genotype combinations in our posterior space
645 which include no more than N samples with their Mth best
646 data likelihood. default: 1,3.
647 -K --no-permute
648 Do not scale prior probability of genotype combination given allele
649 frequency by the number of permutations of included genotypes.
650 -N --exclude-unobserved-genotypes
651 Skip sample genotypings for which the sample has no supporting reads.
652 -S --genotype-variant-threshold N
653 Limit posterior integration to samples where the second-best
654 genotype likelihood is no more than log(N) from the highest
655 genotype likelihood for the sample. default: ~unbounded
656 -j --use-mapping-quality
657 Use mapping quality of alleles when calculating data likelihoods.
658 -D --read-dependence-factor N
659 Incorporate non-independence of reads by scaling successive
660 observations by this factor during data likelihood
661 calculations. default: 0.9
662 -= --no-marginals
663 Do not calculate the marginal probability of genotypes. Saves
664 time and improves scaling performance in large populations.
665
666
667 ------
668
669 **Citation**
670
671 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing &lt;http://arxiv.org/abs/1207.3907&gt;`_.
672
673 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
674
675 </help>
676 </tool>