Mercurial > repos > jorrit > obotest
comparison obo_scripts.xml @ 1:569830319099 draft
adding test files
author | jorrit <jhpoelen@xs4all.nl> |
---|---|
date | Thu, 07 Feb 2013 13:32:43 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:0108dcc237ea | 1:569830319099 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="fetch_obo_ontology2" name="FetchOboOntology2" version="0.0.8"> | |
3 <requirements> | |
4 <requirement type="package" version="0.9.6_9608597d12e127c847ae03aa03440ab63992fedf">freebayes</requirement> | |
5 <requirement type="package" version="0.1.18">samtools</requirement> | |
6 </requirements> | |
7 <description> - obo scripts</description> | |
8 <command> | |
9 ##set up input files | |
10 #set $reference_fasta_filename = "localref.fa" | |
11 #if str( $reference_source.reference_source_selector ) == "history": | |
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
13 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && | |
14 #else: | |
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
16 #end if | |
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && | |
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
20 #end for | |
21 ##finished setting up inputs | |
22 | |
23 ##start FreeBayes commandline | |
24 freebayes | |
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
26 --bam "localbam_${bam_count}.bam" | |
27 #end for | |
28 --fasta-reference "${reference_fasta_filename}" | |
29 | |
30 ##outputs | |
31 --vcf "${output_vcf}" | |
32 | |
33 ##advanced options | |
34 #if str( $options_type.options_type_selector ) == "advanced": | |
35 ##additional outputs | |
36 #if $options_type.output_trace_option: | |
37 --trace "${output_trace}" | |
38 #end if | |
39 #if $options_type.output_failed_alleles_option: | |
40 --failed-alleles "${output_failed_alleles_bed}" | |
41 #end if | |
42 | |
43 ##additional inputs | |
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file": | |
45 --targets "${options_type.target_limit_type.input_target_bed}" | |
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region": | |
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}" | |
48 #end if | |
49 #if $options_type.input_sample_file: | |
50 --samples "${options_type.input_sample_file}" | |
51 #end if | |
52 #if $options_type.input_populations_file: | |
53 --populations "${options_type.input_populations_file}" | |
54 #end if | |
55 #if $options_type.input_cnv_map_bed: | |
56 --cnv-map "${options_type.input_cnv_map_bed}" | |
57 #end if | |
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf": | |
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}" | |
60 ${options_type.input_variant_type.only_use_input_alleles} | |
61 #end if | |
62 #if $options_type.haplotype_basis_alleles: | |
63 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}" | |
64 #end if | |
65 | |
66 | |
67 ##reporting | |
68 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set": | |
69 --pvar "${options_type.section_reporting_type.pvar}" | |
70 ${options_type.section_reporting_type.show_reference_repeats} | |
71 #end if | |
72 | |
73 ##population model | |
74 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set": | |
75 --theta "${options_type.section_population_model_type.theta}" | |
76 --ploidy "${options_type.section_population_model_type.ploidy}" | |
77 ${options_type.section_population_model_type.pooled} | |
78 #end if | |
79 | |
80 ##reference allele | |
81 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele": | |
82 --use-reference-allele | |
83 ${options_type.use_reference_allele_type.diploid_reference} | |
84 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}" | |
85 #end if | |
86 | |
87 ##allele scope | |
88 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set": | |
89 ${options_type.section_allele_scope_type.no_snps} | |
90 ${options_type.section_allele_scope_type.no_indels} | |
91 ${options_type.section_allele_scope_type.no_mnps} | |
92 ${options_type.section_allele_scope_type.no_complex} | |
93 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}" | |
94 #if $options_type.section_allele_scope_type.max_complex_gap: | |
95 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}" | |
96 #end if | |
97 #end if | |
98 | |
99 ##indel realignment | |
100 ${options_type.left_align_indels} | |
101 | |
102 ##input filters | |
103 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set": | |
104 ${options_type.section_input_filters_type.use_duplicate_reads} | |
105 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters": | |
106 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}" | |
107 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}" | |
108 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}" | |
109 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters": | |
110 --standard-filters | |
111 #end if | |
112 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}" | |
113 #if $options_type.section_input_filters_type.read_mismatch_limit: | |
114 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}" | |
115 #end if | |
116 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}" | |
117 #if $options_type.section_input_filters_type.read_snp_limit: | |
118 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}" | |
119 #end if | |
120 #if $options_type.section_input_filters_type.read_indel_limit: | |
121 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}" | |
122 #end if | |
123 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}" | |
124 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}" | |
125 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}" | |
126 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}" | |
127 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}" | |
128 --min-coverage "${options_type.section_input_filters_type.min_coverage}" | |
129 #end if | |
130 | |
131 ##bayesian priors | |
132 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set": | |
133 ${options_type.section_bayesian_priors_type.no_ewens_priors} | |
134 ${options_type.section_bayesian_priors_type.no_population_priors} | |
135 ${options_type.section_bayesian_priors_type.hwe_priors} | |
136 #end if | |
137 | |
138 ##observation prior expectations | |
139 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set": | |
140 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors} | |
141 ${options_type.section_observation_prior_expectations_type.allele_balance_priors} | |
142 #end if | |
143 | |
144 ##algorithmic features | |
145 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set": | |
146 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}" | |
147 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}" | |
148 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}" | |
149 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}" | |
150 ${options_type.section_algorithmic_features_type.no_permute} | |
151 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes} | |
152 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold: | |
153 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}" | |
154 #end if | |
155 ${options_type.section_algorithmic_features_type.use_mapping_quality} | |
156 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}" | |
157 ${options_type.section_algorithmic_features_type.no_marginals} | |
158 #end if | |
159 | |
160 #end if | |
161 </command> | |
162 <inputs> | |
163 <conditional name="reference_source"> | |
164 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
165 <option value="cached">Locally cached</option> | |
166 <option value="history">History</option> | |
167 </param> | |
168 <when value="cached"> | |
169 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
170 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
171 <validator type="unspecified_build" /> | |
172 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> | |
173 </param> | |
174 </repeat> | |
175 <param name="ref_file" type="select" label="Using reference genome"> | |
176 <options from_data_table="sam_fa_indexes"> | |
177 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> | |
178 </options> | |
179 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
180 </param> | |
181 </when> | |
182 <when value="history"> <!-- FIX ME!!!! --> | |
183 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
184 <param name="input_bam" type="data" format="bam" label="BAM file" /> | |
185 </repeat> | |
186 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
187 </when> | |
188 </conditional> | |
189 | |
190 <conditional name="options_type"> | |
191 <param name="options_type_selector" type="select" label="Basic or Advanced options"> | |
192 <option value="basic" selected="True">Basic</option> | |
193 <option value="advanced">Advanced</option> | |
194 </param> | |
195 <when value="basic"> | |
196 <!-- Do nothing here --> | |
197 </when> | |
198 <when value="advanced"> | |
199 | |
200 <!-- output --> | |
201 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" /> | |
202 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" /> | |
203 | |
204 | |
205 <!-- input --> | |
206 <conditional name="target_limit_type"> | |
207 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets"> | |
208 <option value="do_not_limit" selected="True">Do not limit</option> | |
209 <option value="limit_by_target_file">Limit by target file</option> | |
210 <option value="limit_by_region">Limit to region</option> | |
211 </param> | |
212 <when value="do_not_limit"> | |
213 <!-- Do nothing here --> | |
214 </when> | |
215 <when value="limit_by_target_file"> | |
216 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." /> | |
217 </when> | |
218 <when value="limit_by_region"> | |
219 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? --> | |
220 <param name="region_start" type="integer" label="Region Start" value="" /> | |
221 <param name="region_end" type="integer" label="Region End" value="" /> | |
222 </when> | |
223 </conditional> | |
224 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" /> | |
225 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" /> | |
226 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" /> | |
227 <conditional name="input_variant_type"> | |
228 <param name="input_variant_type_selector" type="select" label="Provide variants file"> | |
229 <option value="do_not_provide" selected="True">Do not provide</option> | |
230 <option value="provide_vcf">Provide VCF file</option> | |
231 </param> | |
232 <when value="do_not_provide"> | |
233 <!-- Do nothing here --> | |
234 </when> | |
235 <when value="provide_vcf"> | |
236 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" /> | |
237 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> | |
238 </when> | |
239 </conditional> | |
240 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" /> | |
241 | |
242 <!-- reporting --> | |
243 <conditional name="section_reporting_type"> | |
244 <param name="section_reporting_type_selector" type="select" label="Set Reporting options"> | |
245 <option value="do_not_set" selected="True">Do not set</option> | |
246 <option value="set">Set</option> | |
247 </param> | |
248 <when value="do_not_set"> | |
249 <!-- do nothing here --> | |
250 </when> | |
251 <when value="set"> | |
252 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" /> | |
253 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" /> | |
254 </when> | |
255 </conditional> | |
256 | |
257 | |
258 <!-- population model --> | |
259 <conditional name="section_population_model_type"> | |
260 <param name="section_population_model_type_selector" type="select" label="Set population model options"> | |
261 <option value="do_not_set" selected="True">Do not set</option> | |
262 <option value="set">Set</option> | |
263 </param> | |
264 <when value="do_not_set"> | |
265 <!-- do nothing here --> | |
266 </when> | |
267 <when value="set"> | |
268 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/> | |
269 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" /> | |
270 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." /> | |
271 </when> | |
272 </conditional> | |
273 | |
274 <!-- reference allele --> | |
275 <conditional name="use_reference_allele_type"> | |
276 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis"> | |
277 <option value="do_not_include_reference_allele" selected="True">Do not include</option> | |
278 <option value="include_reference_allele">Include</option> | |
279 </param> | |
280 <when value="do_not_include_reference_allele"> | |
281 <!-- Do nothing here --> | |
282 </when> | |
283 <when value="include_reference_allele"> | |
284 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" /> | |
285 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" /> | |
286 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" /> | |
287 </when> | |
288 </conditional> | |
289 | |
290 <!-- allele scope --> | |
291 <conditional name="section_allele_scope_type"> | |
292 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options"> | |
293 <option value="do_not_set" selected="True">Do not set</option> | |
294 <option value="set">Set</option> | |
295 </param> | |
296 <when value="do_not_set"> | |
297 <!-- do nothing here --> | |
298 </when> | |
299 <when value="set"> | |
300 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" /> | |
301 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" /> | |
302 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" /> | |
303 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" /> | |
304 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" /> | |
305 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/> | |
306 </when> | |
307 </conditional> | |
308 | |
309 <!-- indel realignment --> | |
310 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" /> | |
311 | |
312 <!-- input filters --> | |
313 <conditional name="section_input_filters_type"> | |
314 <param name="section_input_filters_type_selector" type="select" label="Set input filters options"> | |
315 <option value="do_not_set" selected="True">Do not set</option> | |
316 <option value="set">Set</option> | |
317 </param> | |
318 <when value="do_not_set"> | |
319 <!-- do nothing here --> | |
320 </when> | |
321 <when value="set"> | |
322 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" /> | |
323 <conditional name="quality_filter_type"> | |
324 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters"> | |
325 <option value="standard_filters" selected="True">Apply standard</option> | |
326 <option value="apply_filters">Apply specified</option> | |
327 </param> | |
328 <when value="standard_filters"> | |
329 <!-- Do nothing here --> <!-- standard-filters --> | |
330 </when> | |
331 <when value="apply_filters"> | |
332 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" /> | |
333 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" /> | |
334 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" /> | |
335 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" /> | |
336 </when> | |
337 </conditional> | |
338 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" /> | |
339 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
340 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" /> | |
341 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
342 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" /> | |
343 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" /> | |
344 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" /> | |
345 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" /> | |
346 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" /> | |
347 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" /> | |
348 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" /> | |
349 </when> | |
350 </conditional> | |
351 | |
352 | |
353 <!-- bayesian priors --> | |
354 <conditional name="section_bayesian_priors_type"> | |
355 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options"> | |
356 <option value="do_not_set" selected="True">Do not set</option> | |
357 <option value="set">Set</option> | |
358 </param> | |
359 <when value="do_not_set"> | |
360 <!-- do nothing here --> | |
361 </when> | |
362 <when value="set"> | |
363 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" /> | |
364 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" /> | |
365 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" /> | |
366 </when> | |
367 </conditional> | |
368 | |
369 <!-- observation prior expectations --> | |
370 <conditional name="section_observation_prior_expectations_type"> | |
371 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options"> | |
372 <option value="do_not_set" selected="True">Do not set</option> | |
373 <option value="set">Set</option> | |
374 </param> | |
375 <when value="do_not_set"> | |
376 <!-- do nothing here --> | |
377 </when> | |
378 <when value="set"> | |
379 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" /> | |
380 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" /> | |
381 </when> | |
382 </conditional> | |
383 | |
384 | |
385 <!-- algorithmic features --> | |
386 <conditional name="section_algorithmic_features_type"> | |
387 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options"> | |
388 <option value="do_not_set" selected="True">Do not set</option> | |
389 <option value="set">Set</option> | |
390 </param> | |
391 <when value="do_not_set"> | |
392 <!-- do nothing here --> | |
393 </when> | |
394 <when value="set"> | |
395 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" /> | |
396 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" /> | |
397 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" /> | |
398 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" /> | |
399 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" /> | |
400 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" /> | |
401 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" /> | |
402 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" /> | |
403 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" /> | |
404 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" /> | |
405 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" /> | |
406 </when> | |
407 </conditional> | |
408 | |
409 | |
410 </when> | |
411 </conditional> | |
412 | |
413 </inputs> | |
414 <outputs> | |
415 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> | |
416 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> | |
417 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter> | |
418 </data> | |
419 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> | |
420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter> | |
421 </data> | |
422 </outputs> | |
423 <tests> | |
424 <test> | |
425 <param name="reference_source_selector" value="history" /> | |
426 <param name="ref_file" ftype="fasta" value="phiX.fasta"/> | |
427 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/> | |
428 <param name="options_type_selector" value="basic"/> | |
429 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/> | |
430 </test> | |
431 </tests> | |
432 <help> | |
433 **What it does** | |
434 | |
435 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file. | |
436 | |
437 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners. | |
438 | |
439 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development. | |
440 | |
441 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes. | |
442 | |
443 ------ | |
444 | |
445 **Inputs** | |
446 | |
447 FreeBayes accepts an input aligned BAM file. | |
448 | |
449 | |
450 **Outputs** | |
451 | |
452 The output is in the VCF format. | |
453 | |
454 ------- | |
455 | |
456 **Settings**:: | |
457 | |
458 input and output: | |
459 | |
460 -b --bam FILE Add FILE to the set of BAM files to be analyzed. | |
461 -c --stdin Read BAM input on stdin. | |
462 -v --vcf FILE Output VCF-format results to FILE. | |
463 -f --fasta-reference FILE | |
464 Use FILE as the reference sequence for analysis. | |
465 An index file (FILE.fai) will be created if none exists. | |
466 If neither --targets nor --region are specified, FreeBayes | |
467 will analyze every position in this reference. | |
468 -t --targets FILE | |
469 Limit analysis to targets listed in the BED-format FILE. | |
470 -r --region <chrom>:<start_position>..<end_position> | |
471 Limit analysis to the specified region, 0-base coordinates, | |
472 end_position not included (same as BED format). | |
473 -s --samples FILE | |
474 Limit analysis to samples listed (one per line) in the FILE. | |
475 By default FreeBayes will analyze all samples in its input | |
476 BAM files. | |
477 --populations FILE | |
478 Each line of FILE should list a sample and a population which | |
479 it is part of. The population-based bayesian inference model | |
480 will then be partitioned on the basis of the populations. | |
481 -A --cnv-map FILE | |
482 Read a copy number map from the BED file FILE, which has | |
483 the format: | |
484 reference sequence, start, end, sample name, copy number | |
485 ... for each region in each sample which does not have the | |
486 default copy number as set by --ploidy. | |
487 -L --trace FILE Output an algorithmic trace to FILE. | |
488 --failed-alleles FILE | |
489 Write a BED file of the analyzed positions which do not | |
490 pass --pvar to FILE. | |
491 -@ --variant-input VCF | |
492 Use variants reported in VCF file as input to the algorithm. | |
493 A report will be generated for every record in the VCF file. | |
494 -l --only-use-input-alleles | |
495 Only provide variant calls and genotype likelihoods for sites | |
496 and alleles which are provided in the VCF input, and provide | |
497 output in the VCF for all input alleles, not just those which | |
498 have support in the data. | |
499 --haplotype-basis-alleles VCF | |
500 When specified, only variant alleles provided in this input | |
501 VCF will be used for the construction of complex or haplotype | |
502 alleles. | |
503 | |
504 reporting: | |
505 | |
506 -P --pvar N Report sites if the probability that there is a polymorphism | |
507 at the site is greater than N. default: 0.0001 | |
508 -_ --show-reference-repeats | |
509 Calculate and show information about reference repeats in | |
510 the VCF output. | |
511 | |
512 population model: | |
513 | |
514 -T --theta N The expected mutation rate or pairwise nucleotide diversity | |
515 among the population under analysis. This serves as the | |
516 single parameter to the Ewens Sampling Formula prior model | |
517 default: 0.001 | |
518 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2 | |
519 -J --pooled Assume that samples result from pooled sequencing. | |
520 When using this flag, set --ploidy to the number of | |
521 alleles in each sample. | |
522 | |
523 reference allele: | |
524 | |
525 -Z --use-reference-allele | |
526 This flag includes the reference allele in the analysis as | |
527 if it is another sample from the same population. | |
528 -H --diploid-reference | |
529 If using the reference sequence as a sample (-Z), | |
530 treat it as diploid. default: false (reference is haploid) | |
531 --reference-quality MQ,BQ | |
532 Assign mapping quality of MQ to the reference allele at each | |
533 site and base quality of BQ. default: 100,60 | |
534 | |
535 allele scope: | |
536 | |
537 -I --no-snps Ignore SNP alleles. | |
538 -i --no-indels Ignore insertion and deletion alleles. | |
539 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs. | |
540 -u --no-complex Ignore complex events (composites of other classes). | |
541 -n --use-best-n-alleles N | |
542 Evaluate only the best N SNP alleles, ranked by sum of | |
543 supporting quality scores. (Set to 0 to use all; default: all) | |
544 -E --max-complex-gap N | |
545 Allow complex alleles with contiguous embedded matches of up | |
546 to this length. | |
547 | |
548 indel realignment: | |
549 | |
550 -O --left-align-indels | |
551 Left-realign and merge gaps embedded in reads. default: false | |
552 | |
553 input filters: | |
554 | |
555 -4 --use-duplicate-reads | |
556 Include duplicate-marked alignments in the analysis. | |
557 default: exclude duplicates | |
558 -m --min-mapping-quality Q | |
559 Exclude alignments from analysis if they have a mapping | |
560 quality less than Q. default: 30 | |
561 -q --min-base-quality Q | |
562 Exclude alleles from analysis if their supporting base | |
563 quality is less than Q. default: 20 | |
564 -R --min-supporting-quality MQ,BQ | |
565 In order to consider an alternate allele, at least one supporting | |
566 alignment must have mapping quality MQ, and one supporting | |
567 allele must have base quality BQ. default: 0,0, unset | |
568 -Q --mismatch-base-quality-threshold Q | |
569 Count mismatches toward --read-mismatch-limit if the base | |
570 quality of the mismatch is >= Q. default: 10 | |
571 -U --read-mismatch-limit N | |
572 Exclude reads with more than N mismatches where each mismatch | |
573 has base quality >= mismatch-base-quality-threshold. | |
574 default: ~unbounded | |
575 -z --read-max-mismatch-fraction N | |
576 Exclude reads with more than N [0,1] fraction of mismatches where | |
577 each mismatch has base quality >= mismatch-base-quality-threshold | |
578 default: 1.0 | |
579 -$ --read-snp-limit N | |
580 Exclude reads with more than N base mismatches, ignoring gaps | |
581 with quality >= mismatch-base-quality-threshold. | |
582 default: ~unbounded | |
583 -e --read-indel-limit N | |
584 Exclude reads with more than N separate gaps. | |
585 default: ~unbounded | |
586 -0 --standard-filters Use stringent input base and mapping quality filters | |
587 Equivalent to -m 30 -q 20 -R 0 -S 0 | |
588 -x --indel-exclusion-window | |
589 Ignore portions of alignments this many bases from a | |
590 putative insertion or deletion allele. default: 0 | |
591 -F --min-alternate-fraction N | |
592 Require at least this fraction of observations supporting | |
593 an alternate allele within a single individual in the | |
594 in order to evaluate the position. default: 0.0 | |
595 -C --min-alternate-count N | |
596 Require at least this count of observations supporting | |
597 an alternate allele within a single individual in order | |
598 to evaluate the position. default: 1 | |
599 -3 --min-alternate-qsum N | |
600 Require at least this sum of quality of observations supporting | |
601 an alternate allele within a single individual in order | |
602 to evaluate the position. default: 0 | |
603 -G --min-alternate-total N | |
604 Require at least this count of observations supporting | |
605 an alternate allele within the total population in order | |
606 to use the allele in analysis. default: 1 | |
607 -! --min-coverage N | |
608 Require at least this coverage to process a site. default: 0 | |
609 | |
610 bayesian priors: | |
611 | |
612 -Y --no-ewens-priors | |
613 Turns off the Ewens' Sampling Formula component of the priors. | |
614 -k --no-population-priors | |
615 Equivalent to --pooled --no-ewens-priors | |
616 -w --hwe-priors Use the probability of the combination arising under HWE given | |
617 the allele frequency as estimated by observation frequency. | |
618 | |
619 observation prior expectations: | |
620 | |
621 -V --binomial-obs-priors | |
622 Incorporate expectations about osbervations into the priors, | |
623 Uses read placement probability, strand balance probability, | |
624 and read position (5'-3') probability. | |
625 -a --allele-balance-priors | |
626 Use aggregate probability of observation balance between alleles | |
627 as a component of the priors. Best for observations with minimal | |
628 inherent reference bias. | |
629 | |
630 algorithmic features: | |
631 | |
632 -M --site-selection-max-iterations N | |
633 Uses hill-climbing algorithm to search posterior space for N | |
634 iterations to determine if the site should be evaluated. Set to 0 | |
635 to prevent use of this algorithm for site selection, and | |
636 to a low integer for improvide site selection at a slight | |
637 performance penalty. default: 5. | |
638 -B --genotyping-max-iterations N | |
639 Iterate no more than N times during genotyping step. default: 25. | |
640 --genotyping-max-banddepth N | |
641 Integrate no deeper than the Nth best genotype by likelihood when | |
642 genotyping. default: 6. | |
643 -W --posterior-integration-limits N,M | |
644 Integrate all genotype combinations in our posterior space | |
645 which include no more than N samples with their Mth best | |
646 data likelihood. default: 1,3. | |
647 -K --no-permute | |
648 Do not scale prior probability of genotype combination given allele | |
649 frequency by the number of permutations of included genotypes. | |
650 -N --exclude-unobserved-genotypes | |
651 Skip sample genotypings for which the sample has no supporting reads. | |
652 -S --genotype-variant-threshold N | |
653 Limit posterior integration to samples where the second-best | |
654 genotype likelihood is no more than log(N) from the highest | |
655 genotype likelihood for the sample. default: ~unbounded | |
656 -j --use-mapping-quality | |
657 Use mapping quality of alleles when calculating data likelihoods. | |
658 -D --read-dependence-factor N | |
659 Incorporate non-independence of reads by scaling successive | |
660 observations by this factor during data likelihood | |
661 calculations. default: 0.9 | |
662 -= --no-marginals | |
663 Do not calculate the marginal probability of genotypes. Saves | |
664 time and improves scaling performance in large populations. | |
665 | |
666 | |
667 ------ | |
668 | |
669 **Citation** | |
670 | |
671 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_. | |
672 | |
673 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | |
674 | |
675 </help> | |
676 </tool> |