1
|
1 <?xml version="1.0"?>
|
|
2 <tool id="fetch_obo_ontology2" name="FetchOboOntology2" version="0.0.8">
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.9.6_9608597d12e127c847ae03aa03440ab63992fedf">freebayes</requirement>
|
|
5 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
6 </requirements>
|
|
7 <description> - obo scripts</description>
|
|
8 <command>
|
|
9 ##set up input files
|
|
10 #set $reference_fasta_filename = "localref.fa"
|
|
11 #if str( $reference_source.reference_source_selector ) == "history":
|
|
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
|
|
13 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
|
|
14 #else:
|
|
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
16 #end if
|
|
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &&
|
|
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
|
|
20 #end for
|
|
21 ##finished setting up inputs
|
|
22
|
|
23 ##start FreeBayes commandline
|
|
24 freebayes
|
|
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
|
|
26 --bam "localbam_${bam_count}.bam"
|
|
27 #end for
|
|
28 --fasta-reference "${reference_fasta_filename}"
|
|
29
|
|
30 ##outputs
|
|
31 --vcf "${output_vcf}"
|
|
32
|
|
33 ##advanced options
|
|
34 #if str( $options_type.options_type_selector ) == "advanced":
|
|
35 ##additional outputs
|
|
36 #if $options_type.output_trace_option:
|
|
37 --trace "${output_trace}"
|
|
38 #end if
|
|
39 #if $options_type.output_failed_alleles_option:
|
|
40 --failed-alleles "${output_failed_alleles_bed}"
|
|
41 #end if
|
|
42
|
|
43 ##additional inputs
|
|
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
|
|
45 --targets "${options_type.target_limit_type.input_target_bed}"
|
|
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
|
|
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
|
|
48 #end if
|
|
49 #if $options_type.input_sample_file:
|
|
50 --samples "${options_type.input_sample_file}"
|
|
51 #end if
|
|
52 #if $options_type.input_populations_file:
|
|
53 --populations "${options_type.input_populations_file}"
|
|
54 #end if
|
|
55 #if $options_type.input_cnv_map_bed:
|
|
56 --cnv-map "${options_type.input_cnv_map_bed}"
|
|
57 #end if
|
|
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}"
|
|
60 ${options_type.input_variant_type.only_use_input_alleles}
|
|
61 #end if
|
|
62 #if $options_type.haplotype_basis_alleles:
|
|
63 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}"
|
|
64 #end if
|
|
65
|
|
66
|
|
67 ##reporting
|
|
68 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
|
|
69 --pvar "${options_type.section_reporting_type.pvar}"
|
|
70 ${options_type.section_reporting_type.show_reference_repeats}
|
|
71 #end if
|
|
72
|
|
73 ##population model
|
|
74 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
|
|
75 --theta "${options_type.section_population_model_type.theta}"
|
|
76 --ploidy "${options_type.section_population_model_type.ploidy}"
|
|
77 ${options_type.section_population_model_type.pooled}
|
|
78 #end if
|
|
79
|
|
80 ##reference allele
|
|
81 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
|
|
82 --use-reference-allele
|
|
83 ${options_type.use_reference_allele_type.diploid_reference}
|
|
84 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
|
|
85 #end if
|
|
86
|
|
87 ##allele scope
|
|
88 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
|
|
89 ${options_type.section_allele_scope_type.no_snps}
|
|
90 ${options_type.section_allele_scope_type.no_indels}
|
|
91 ${options_type.section_allele_scope_type.no_mnps}
|
|
92 ${options_type.section_allele_scope_type.no_complex}
|
|
93 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
|
|
94 #if $options_type.section_allele_scope_type.max_complex_gap:
|
|
95 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
|
|
96 #end if
|
|
97 #end if
|
|
98
|
|
99 ##indel realignment
|
|
100 ${options_type.left_align_indels}
|
|
101
|
|
102 ##input filters
|
|
103 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
|
|
104 ${options_type.section_input_filters_type.use_duplicate_reads}
|
|
105 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters":
|
|
106 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}"
|
|
107 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}"
|
|
108 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}"
|
|
109 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters":
|
|
110 --standard-filters
|
|
111 #end if
|
|
112 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
|
|
113 #if $options_type.section_input_filters_type.read_mismatch_limit:
|
|
114 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
|
|
115 #end if
|
|
116 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
|
|
117 #if $options_type.section_input_filters_type.read_snp_limit:
|
|
118 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
|
|
119 #end if
|
|
120 #if $options_type.section_input_filters_type.read_indel_limit:
|
|
121 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
|
|
122 #end if
|
|
123 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
|
|
124 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
|
|
125 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
|
|
126 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
|
|
127 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
|
|
128 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
|
|
129 #end if
|
|
130
|
|
131 ##bayesian priors
|
|
132 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
|
|
133 ${options_type.section_bayesian_priors_type.no_ewens_priors}
|
|
134 ${options_type.section_bayesian_priors_type.no_population_priors}
|
|
135 ${options_type.section_bayesian_priors_type.hwe_priors}
|
|
136 #end if
|
|
137
|
|
138 ##observation prior expectations
|
|
139 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
|
|
140 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
|
|
141 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
|
|
142 #end if
|
|
143
|
|
144 ##algorithmic features
|
|
145 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
|
|
146 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
|
|
147 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
|
|
148 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
|
|
149 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
|
|
150 ${options_type.section_algorithmic_features_type.no_permute}
|
|
151 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
|
|
152 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
|
|
153 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
|
|
154 #end if
|
|
155 ${options_type.section_algorithmic_features_type.use_mapping_quality}
|
|
156 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
|
|
157 ${options_type.section_algorithmic_features_type.no_marginals}
|
|
158 #end if
|
|
159
|
|
160 #end if
|
|
161 </command>
|
|
162 <inputs>
|
|
163 <conditional name="reference_source">
|
|
164 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
|
|
165 <option value="cached">Locally cached</option>
|
|
166 <option value="history">History</option>
|
|
167 </param>
|
|
168 <when value="cached">
|
|
169 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
170 <param name="input_bam" type="data" format="bam" label="BAM file">
|
|
171 <validator type="unspecified_build" />
|
|
172 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." />
|
|
173 </param>
|
|
174 </repeat>
|
|
175 <param name="ref_file" type="select" label="Using reference genome">
|
|
176 <options from_data_table="sam_fa_indexes">
|
|
177 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
|
|
178 </options>
|
|
179 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
|
|
180 </param>
|
|
181 </when>
|
|
182 <when value="history"> <!-- FIX ME!!!! -->
|
|
183 <repeat name="input_bams" title="Sample BAM file" min="1">
|
|
184 <param name="input_bam" type="data" format="bam" label="BAM file" />
|
|
185 </repeat>
|
|
186 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
|
|
187 </when>
|
|
188 </conditional>
|
|
189
|
|
190 <conditional name="options_type">
|
|
191 <param name="options_type_selector" type="select" label="Basic or Advanced options">
|
|
192 <option value="basic" selected="True">Basic</option>
|
|
193 <option value="advanced">Advanced</option>
|
|
194 </param>
|
|
195 <when value="basic">
|
|
196 <!-- Do nothing here -->
|
|
197 </when>
|
|
198 <when value="advanced">
|
|
199
|
|
200 <!-- output -->
|
|
201 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" />
|
|
202 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" />
|
|
203
|
|
204
|
|
205 <!-- input -->
|
|
206 <conditional name="target_limit_type">
|
|
207 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets">
|
|
208 <option value="do_not_limit" selected="True">Do not limit</option>
|
|
209 <option value="limit_by_target_file">Limit by target file</option>
|
|
210 <option value="limit_by_region">Limit to region</option>
|
|
211 </param>
|
|
212 <when value="do_not_limit">
|
|
213 <!-- Do nothing here -->
|
|
214 </when>
|
|
215 <when value="limit_by_target_file">
|
|
216 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." />
|
|
217 </when>
|
|
218 <when value="limit_by_region">
|
|
219 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
|
|
220 <param name="region_start" type="integer" label="Region Start" value="" />
|
|
221 <param name="region_end" type="integer" label="Region End" value="" />
|
|
222 </when>
|
|
223 </conditional>
|
|
224 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" />
|
|
225 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" />
|
|
226 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" />
|
|
227 <conditional name="input_variant_type">
|
|
228 <param name="input_variant_type_selector" type="select" label="Provide variants file">
|
|
229 <option value="do_not_provide" selected="True">Do not provide</option>
|
|
230 <option value="provide_vcf">Provide VCF file</option>
|
|
231 </param>
|
|
232 <when value="do_not_provide">
|
|
233 <!-- Do nothing here -->
|
|
234 </when>
|
|
235 <when value="provide_vcf">
|
|
236 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" />
|
|
237 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
|
|
238 </when>
|
|
239 </conditional>
|
|
240 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" />
|
|
241
|
|
242 <!-- reporting -->
|
|
243 <conditional name="section_reporting_type">
|
|
244 <param name="section_reporting_type_selector" type="select" label="Set Reporting options">
|
|
245 <option value="do_not_set" selected="True">Do not set</option>
|
|
246 <option value="set">Set</option>
|
|
247 </param>
|
|
248 <when value="do_not_set">
|
|
249 <!-- do nothing here -->
|
|
250 </when>
|
|
251 <when value="set">
|
|
252 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
|
|
253 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
|
|
254 </when>
|
|
255 </conditional>
|
|
256
|
|
257
|
|
258 <!-- population model -->
|
|
259 <conditional name="section_population_model_type">
|
|
260 <param name="section_population_model_type_selector" type="select" label="Set population model options">
|
|
261 <option value="do_not_set" selected="True">Do not set</option>
|
|
262 <option value="set">Set</option>
|
|
263 </param>
|
|
264 <when value="do_not_set">
|
|
265 <!-- do nothing here -->
|
|
266 </when>
|
|
267 <when value="set">
|
|
268 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
|
|
269 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
|
|
270 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
|
|
271 </when>
|
|
272 </conditional>
|
|
273
|
|
274 <!-- reference allele -->
|
|
275 <conditional name="use_reference_allele_type">
|
|
276 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
|
|
277 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
|
|
278 <option value="include_reference_allele">Include</option>
|
|
279 </param>
|
|
280 <when value="do_not_include_reference_allele">
|
|
281 <!-- Do nothing here -->
|
|
282 </when>
|
|
283 <when value="include_reference_allele">
|
|
284 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
|
|
285 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
|
|
286 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
|
|
287 </when>
|
|
288 </conditional>
|
|
289
|
|
290 <!-- allele scope -->
|
|
291 <conditional name="section_allele_scope_type">
|
|
292 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
|
|
293 <option value="do_not_set" selected="True">Do not set</option>
|
|
294 <option value="set">Set</option>
|
|
295 </param>
|
|
296 <when value="do_not_set">
|
|
297 <!-- do nothing here -->
|
|
298 </when>
|
|
299 <when value="set">
|
|
300 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
|
|
301 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
|
|
302 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
|
|
303 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
|
|
304 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
|
|
305 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
|
|
306 </when>
|
|
307 </conditional>
|
|
308
|
|
309 <!-- indel realignment -->
|
|
310 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
|
|
311
|
|
312 <!-- input filters -->
|
|
313 <conditional name="section_input_filters_type">
|
|
314 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
|
|
315 <option value="do_not_set" selected="True">Do not set</option>
|
|
316 <option value="set">Set</option>
|
|
317 </param>
|
|
318 <when value="do_not_set">
|
|
319 <!-- do nothing here -->
|
|
320 </when>
|
|
321 <when value="set">
|
|
322 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
|
|
323 <conditional name="quality_filter_type">
|
|
324 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters">
|
|
325 <option value="standard_filters" selected="True">Apply standard</option>
|
|
326 <option value="apply_filters">Apply specified</option>
|
|
327 </param>
|
|
328 <when value="standard_filters">
|
|
329 <!-- Do nothing here --> <!-- standard-filters -->
|
|
330 </when>
|
|
331 <when value="apply_filters">
|
|
332 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" />
|
|
333 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" />
|
|
334 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
|
|
335 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
|
|
336 </when>
|
|
337 </conditional>
|
|
338 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" />
|
|
339 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" />
|
|
340 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" />
|
|
341 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" />
|
|
342 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
|
|
343 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
|
|
344 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
|
|
345 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
|
|
346 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
|
|
347 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
|
|
348 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
|
|
349 </when>
|
|
350 </conditional>
|
|
351
|
|
352
|
|
353 <!-- bayesian priors -->
|
|
354 <conditional name="section_bayesian_priors_type">
|
|
355 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
|
|
356 <option value="do_not_set" selected="True">Do not set</option>
|
|
357 <option value="set">Set</option>
|
|
358 </param>
|
|
359 <when value="do_not_set">
|
|
360 <!-- do nothing here -->
|
|
361 </when>
|
|
362 <when value="set">
|
|
363 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
|
|
364 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
|
|
365 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
|
|
366 </when>
|
|
367 </conditional>
|
|
368
|
|
369 <!-- observation prior expectations -->
|
|
370 <conditional name="section_observation_prior_expectations_type">
|
|
371 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
|
|
372 <option value="do_not_set" selected="True">Do not set</option>
|
|
373 <option value="set">Set</option>
|
|
374 </param>
|
|
375 <when value="do_not_set">
|
|
376 <!-- do nothing here -->
|
|
377 </when>
|
|
378 <when value="set">
|
|
379 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
|
|
380 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
|
|
381 </when>
|
|
382 </conditional>
|
|
383
|
|
384
|
|
385 <!-- algorithmic features -->
|
|
386 <conditional name="section_algorithmic_features_type">
|
|
387 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
|
|
388 <option value="do_not_set" selected="True">Do not set</option>
|
|
389 <option value="set">Set</option>
|
|
390 </param>
|
|
391 <when value="do_not_set">
|
|
392 <!-- do nothing here -->
|
|
393 </when>
|
|
394 <when value="set">
|
|
395 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
|
|
396 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
|
|
397 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
|
|
398 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
|
|
399 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
|
|
400 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
|
|
401 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
|
|
402 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
|
|
403 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
|
|
404 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
|
|
405 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
|
|
406 </when>
|
|
407 </conditional>
|
|
408
|
|
409
|
|
410 </when>
|
|
411 </conditional>
|
|
412
|
|
413 </inputs>
|
|
414 <outputs>
|
|
415 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
|
|
416 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
|
|
417 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter>
|
|
418 </data>
|
|
419 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
|
|
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter>
|
|
421 </data>
|
|
422 </outputs>
|
|
423 <tests>
|
|
424 <test>
|
|
425 <param name="reference_source_selector" value="history" />
|
|
426 <param name="ref_file" ftype="fasta" value="phiX.fasta"/>
|
|
427 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/>
|
|
428 <param name="options_type_selector" value="basic"/>
|
|
429 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/>
|
|
430 </test>
|
|
431 </tests>
|
|
432 <help>
|
|
433 **What it does**
|
|
434
|
|
435 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file.
|
|
436
|
|
437 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
|
|
438
|
|
439 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
|
|
440
|
|
441 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes.
|
|
442
|
|
443 ------
|
|
444
|
|
445 **Inputs**
|
|
446
|
|
447 FreeBayes accepts an input aligned BAM file.
|
|
448
|
|
449
|
|
450 **Outputs**
|
|
451
|
|
452 The output is in the VCF format.
|
|
453
|
|
454 -------
|
|
455
|
|
456 **Settings**::
|
|
457
|
|
458 input and output:
|
|
459
|
|
460 -b --bam FILE Add FILE to the set of BAM files to be analyzed.
|
|
461 -c --stdin Read BAM input on stdin.
|
|
462 -v --vcf FILE Output VCF-format results to FILE.
|
|
463 -f --fasta-reference FILE
|
|
464 Use FILE as the reference sequence for analysis.
|
|
465 An index file (FILE.fai) will be created if none exists.
|
|
466 If neither --targets nor --region are specified, FreeBayes
|
|
467 will analyze every position in this reference.
|
|
468 -t --targets FILE
|
|
469 Limit analysis to targets listed in the BED-format FILE.
|
|
470 -r --region <chrom>:<start_position>..<end_position>
|
|
471 Limit analysis to the specified region, 0-base coordinates,
|
|
472 end_position not included (same as BED format).
|
|
473 -s --samples FILE
|
|
474 Limit analysis to samples listed (one per line) in the FILE.
|
|
475 By default FreeBayes will analyze all samples in its input
|
|
476 BAM files.
|
|
477 --populations FILE
|
|
478 Each line of FILE should list a sample and a population which
|
|
479 it is part of. The population-based bayesian inference model
|
|
480 will then be partitioned on the basis of the populations.
|
|
481 -A --cnv-map FILE
|
|
482 Read a copy number map from the BED file FILE, which has
|
|
483 the format:
|
|
484 reference sequence, start, end, sample name, copy number
|
|
485 ... for each region in each sample which does not have the
|
|
486 default copy number as set by --ploidy.
|
|
487 -L --trace FILE Output an algorithmic trace to FILE.
|
|
488 --failed-alleles FILE
|
|
489 Write a BED file of the analyzed positions which do not
|
|
490 pass --pvar to FILE.
|
|
491 -@ --variant-input VCF
|
|
492 Use variants reported in VCF file as input to the algorithm.
|
|
493 A report will be generated for every record in the VCF file.
|
|
494 -l --only-use-input-alleles
|
|
495 Only provide variant calls and genotype likelihoods for sites
|
|
496 and alleles which are provided in the VCF input, and provide
|
|
497 output in the VCF for all input alleles, not just those which
|
|
498 have support in the data.
|
|
499 --haplotype-basis-alleles VCF
|
|
500 When specified, only variant alleles provided in this input
|
|
501 VCF will be used for the construction of complex or haplotype
|
|
502 alleles.
|
|
503
|
|
504 reporting:
|
|
505
|
|
506 -P --pvar N Report sites if the probability that there is a polymorphism
|
|
507 at the site is greater than N. default: 0.0001
|
|
508 -_ --show-reference-repeats
|
|
509 Calculate and show information about reference repeats in
|
|
510 the VCF output.
|
|
511
|
|
512 population model:
|
|
513
|
|
514 -T --theta N The expected mutation rate or pairwise nucleotide diversity
|
|
515 among the population under analysis. This serves as the
|
|
516 single parameter to the Ewens Sampling Formula prior model
|
|
517 default: 0.001
|
|
518 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
|
|
519 -J --pooled Assume that samples result from pooled sequencing.
|
|
520 When using this flag, set --ploidy to the number of
|
|
521 alleles in each sample.
|
|
522
|
|
523 reference allele:
|
|
524
|
|
525 -Z --use-reference-allele
|
|
526 This flag includes the reference allele in the analysis as
|
|
527 if it is another sample from the same population.
|
|
528 -H --diploid-reference
|
|
529 If using the reference sequence as a sample (-Z),
|
|
530 treat it as diploid. default: false (reference is haploid)
|
|
531 --reference-quality MQ,BQ
|
|
532 Assign mapping quality of MQ to the reference allele at each
|
|
533 site and base quality of BQ. default: 100,60
|
|
534
|
|
535 allele scope:
|
|
536
|
|
537 -I --no-snps Ignore SNP alleles.
|
|
538 -i --no-indels Ignore insertion and deletion alleles.
|
|
539 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
|
|
540 -u --no-complex Ignore complex events (composites of other classes).
|
|
541 -n --use-best-n-alleles N
|
|
542 Evaluate only the best N SNP alleles, ranked by sum of
|
|
543 supporting quality scores. (Set to 0 to use all; default: all)
|
|
544 -E --max-complex-gap N
|
|
545 Allow complex alleles with contiguous embedded matches of up
|
|
546 to this length.
|
|
547
|
|
548 indel realignment:
|
|
549
|
|
550 -O --left-align-indels
|
|
551 Left-realign and merge gaps embedded in reads. default: false
|
|
552
|
|
553 input filters:
|
|
554
|
|
555 -4 --use-duplicate-reads
|
|
556 Include duplicate-marked alignments in the analysis.
|
|
557 default: exclude duplicates
|
|
558 -m --min-mapping-quality Q
|
|
559 Exclude alignments from analysis if they have a mapping
|
|
560 quality less than Q. default: 30
|
|
561 -q --min-base-quality Q
|
|
562 Exclude alleles from analysis if their supporting base
|
|
563 quality is less than Q. default: 20
|
|
564 -R --min-supporting-quality MQ,BQ
|
|
565 In order to consider an alternate allele, at least one supporting
|
|
566 alignment must have mapping quality MQ, and one supporting
|
|
567 allele must have base quality BQ. default: 0,0, unset
|
|
568 -Q --mismatch-base-quality-threshold Q
|
|
569 Count mismatches toward --read-mismatch-limit if the base
|
|
570 quality of the mismatch is >= Q. default: 10
|
|
571 -U --read-mismatch-limit N
|
|
572 Exclude reads with more than N mismatches where each mismatch
|
|
573 has base quality >= mismatch-base-quality-threshold.
|
|
574 default: ~unbounded
|
|
575 -z --read-max-mismatch-fraction N
|
|
576 Exclude reads with more than N [0,1] fraction of mismatches where
|
|
577 each mismatch has base quality >= mismatch-base-quality-threshold
|
|
578 default: 1.0
|
|
579 -$ --read-snp-limit N
|
|
580 Exclude reads with more than N base mismatches, ignoring gaps
|
|
581 with quality >= mismatch-base-quality-threshold.
|
|
582 default: ~unbounded
|
|
583 -e --read-indel-limit N
|
|
584 Exclude reads with more than N separate gaps.
|
|
585 default: ~unbounded
|
|
586 -0 --standard-filters Use stringent input base and mapping quality filters
|
|
587 Equivalent to -m 30 -q 20 -R 0 -S 0
|
|
588 -x --indel-exclusion-window
|
|
589 Ignore portions of alignments this many bases from a
|
|
590 putative insertion or deletion allele. default: 0
|
|
591 -F --min-alternate-fraction N
|
|
592 Require at least this fraction of observations supporting
|
|
593 an alternate allele within a single individual in the
|
|
594 in order to evaluate the position. default: 0.0
|
|
595 -C --min-alternate-count N
|
|
596 Require at least this count of observations supporting
|
|
597 an alternate allele within a single individual in order
|
|
598 to evaluate the position. default: 1
|
|
599 -3 --min-alternate-qsum N
|
|
600 Require at least this sum of quality of observations supporting
|
|
601 an alternate allele within a single individual in order
|
|
602 to evaluate the position. default: 0
|
|
603 -G --min-alternate-total N
|
|
604 Require at least this count of observations supporting
|
|
605 an alternate allele within the total population in order
|
|
606 to use the allele in analysis. default: 1
|
|
607 -! --min-coverage N
|
|
608 Require at least this coverage to process a site. default: 0
|
|
609
|
|
610 bayesian priors:
|
|
611
|
|
612 -Y --no-ewens-priors
|
|
613 Turns off the Ewens' Sampling Formula component of the priors.
|
|
614 -k --no-population-priors
|
|
615 Equivalent to --pooled --no-ewens-priors
|
|
616 -w --hwe-priors Use the probability of the combination arising under HWE given
|
|
617 the allele frequency as estimated by observation frequency.
|
|
618
|
|
619 observation prior expectations:
|
|
620
|
|
621 -V --binomial-obs-priors
|
|
622 Incorporate expectations about osbervations into the priors,
|
|
623 Uses read placement probability, strand balance probability,
|
|
624 and read position (5'-3') probability.
|
|
625 -a --allele-balance-priors
|
|
626 Use aggregate probability of observation balance between alleles
|
|
627 as a component of the priors. Best for observations with minimal
|
|
628 inherent reference bias.
|
|
629
|
|
630 algorithmic features:
|
|
631
|
|
632 -M --site-selection-max-iterations N
|
|
633 Uses hill-climbing algorithm to search posterior space for N
|
|
634 iterations to determine if the site should be evaluated. Set to 0
|
|
635 to prevent use of this algorithm for site selection, and
|
|
636 to a low integer for improvide site selection at a slight
|
|
637 performance penalty. default: 5.
|
|
638 -B --genotyping-max-iterations N
|
|
639 Iterate no more than N times during genotyping step. default: 25.
|
|
640 --genotyping-max-banddepth N
|
|
641 Integrate no deeper than the Nth best genotype by likelihood when
|
|
642 genotyping. default: 6.
|
|
643 -W --posterior-integration-limits N,M
|
|
644 Integrate all genotype combinations in our posterior space
|
|
645 which include no more than N samples with their Mth best
|
|
646 data likelihood. default: 1,3.
|
|
647 -K --no-permute
|
|
648 Do not scale prior probability of genotype combination given allele
|
|
649 frequency by the number of permutations of included genotypes.
|
|
650 -N --exclude-unobserved-genotypes
|
|
651 Skip sample genotypings for which the sample has no supporting reads.
|
|
652 -S --genotype-variant-threshold N
|
|
653 Limit posterior integration to samples where the second-best
|
|
654 genotype likelihood is no more than log(N) from the highest
|
|
655 genotype likelihood for the sample. default: ~unbounded
|
|
656 -j --use-mapping-quality
|
|
657 Use mapping quality of alleles when calculating data likelihoods.
|
|
658 -D --read-dependence-factor N
|
|
659 Incorporate non-independence of reads by scaling successive
|
|
660 observations by this factor during data likelihood
|
|
661 calculations. default: 0.9
|
|
662 -= --no-marginals
|
|
663 Do not calculate the marginal probability of genotypes. Saves
|
|
664 time and improves scaling performance in large populations.
|
|
665
|
|
666
|
|
667 ------
|
|
668
|
|
669 **Citation**
|
|
670
|
|
671 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_.
|
|
672
|
|
673 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
|
|
674
|
|
675 </help>
|
|
676 </tool>
|