0
|
1 <tool id="freebayes" name="FreeBayes" version="@DEPENDENCY_VERSION@">
|
|
2 <description>bayesian genetic variant detector</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements">
|
|
7 <requirement type="package" version="4.1.3">gawk</requirement>
|
|
8 <requirement type="package" version="20170422">parallel</requirement>
|
|
9 </expand>
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11 ##set up input files
|
|
12
|
|
13 #set $reference_fasta_filename = "localref.fa"
|
|
14
|
|
15 #if str( $reference_source.reference_source_selector ) == "history":
|
|
16 ln -s -f '${reference_source.ref_file}' '${reference_fasta_filename}' &&
|
|
17 samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 &&
|
|
18 #else:
|
|
19 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
20 #end if
|
|
21
|
|
22 #if $reference_source.batchmode.processmode == 'merge':
|
|
23 #set $input_bamfiles = $reference_source.batchmode.input_bams
|
|
24 #else:
|
|
25 #set $input_bamfiles = [ $reference_source.batchmode.input_bams ]
|
|
26 #end if
|
|
27
|
|
28 #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
|
|
29 ln -s -f '${input_bam}' 'b_${bam_count}.bam' &&
|
|
30 ln -s -f '${input_bam.metadata.bam_index}' 'b_${bam_count}.bam.bai' &&
|
|
31 #end for
|
|
32
|
|
33 ## Tabixize optional input_variant_vcf file (for --variant-input option)
|
|
34 #if ( str( $options_type.options_type_selector ) == 'cline' or str( $options_type.options_type_selector ) == 'full' ) and str( $options_type.optional_inputs.optional_inputs_selector ) == 'set' and str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
35 ln -s -f '${options_type.optional_inputs.input_variant_type.input_variant_vcf}' input_variant_vcf.vcf.gz &&
|
|
36 ln -s -f '${Tabixized_input}' input_variant_vcf.vcf.gz.tbi &&
|
|
37 #end if
|
|
38
|
|
39 ##if the user has specified a region or target file, just use that instead of calculating a set of unique regions
|
|
40 #if str( $target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
|
|
41 ln -s '${target_limit_type.input_target_bed}' regions_all.bed &&
|
|
42 #elif str( $target_limit_type.target_limit_type_selector ) == "limit_by_region":
|
|
43 printf '${target_limit_type.region_chromosome}\t${target_limit_type.region_start}\t${target_limit_type.region_end}' > regions_all.bed &&
|
|
44 #else
|
|
45 ##divide up the regions in the bam file for efficient processing
|
|
46 #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
|
|
47 samtools view -H b_${bam_count}.bam |
|
|
48 grep '^@SQ' |
|
|
49 cut -f 2- |
|
|
50 awk '{ gsub("^SN:","",$1); gsub("^LN:","",$2); print $1"\t0\t"$2; }' >> regions_all.bed &&
|
|
51 #end for
|
|
52 #end if
|
|
53
|
|
54 sort -u regions_all.bed > regions_uniq.bed &&
|
|
55 ## split into even small chunks, this has some disatvantages and will not be used for the moment
|
|
56 ## bedtools makewindows -b regions_uniq.bed -w 10000000 -s 9990000 > regions.bed &&
|
|
57
|
|
58 mkdir vcf_output failed_alleles trace &&
|
|
59
|
|
60 ## Finished setting up inputs
|
|
61
|
|
62 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
|
|
63 do
|
|
64 echo "
|
|
65
|
|
66 ## COMMAND LINE STARTS HERE
|
|
67
|
|
68 freebayes
|
|
69
|
|
70 --region '\$i'
|
|
71
|
|
72 #for $bam_count, $input_bam in enumerate( $input_bamfiles ):
|
|
73 --bam 'b_${bam_count}.bam'
|
|
74 #end for
|
|
75 --fasta-reference '${reference_fasta_filename}'
|
|
76
|
|
77 ## Outputs
|
|
78 --vcf './vcf_output/part_\$i.vcf'
|
|
79
|
|
80 ## Coverage
|
|
81 #if str($coverage_options.coverage_options_selector) == "set":
|
|
82 @COVERAGE@
|
|
83 #end if
|
|
84
|
|
85 ##advanced options
|
|
86 #if str( $options_type.options_type_selector ) == "simple":
|
|
87 #pass
|
|
88 #elif str( $options_type.options_type_selector ) == "simple_w_filters":
|
|
89 --standard-filters
|
|
90 #elif str( $options_type.options_type_selector ) == "naive":
|
|
91 --haplotype-length 0
|
|
92 --min-alternate-count 1
|
|
93 --min-alternate-fraction 0.05
|
|
94 --pooled-continuous
|
|
95 --report-monomorphic
|
|
96 #elif str( $options_type.options_type_selector ) == "naive_w_filters":
|
|
97 --haplotype-length 0
|
|
98 --min-alternate-count 1
|
|
99 --min-alternate-fraction 0.05
|
|
100 --pooled-continuous
|
|
101 --report-monomorphic
|
|
102 --standard-filters
|
|
103 #elif str( $options_type.options_type_selector ) == "full":
|
|
104 #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
|
|
105 ${options_type.optional_inputs.report_monomorphic}
|
|
106
|
|
107 #if $options_type.optional_inputs.output_trace_option:
|
|
108 --trace ./trace/part_'\$i'.txt
|
|
109 #end if
|
|
110 #if $options_type.optional_inputs.output_failed_alleles_option:
|
|
111 --failed-alleles ./failed_alleles/part_'\$i'.bed
|
|
112 #end if
|
|
113 #if $options_type.optional_inputs.samples:
|
|
114 --samples '${options_type.optional_inputs.samples}'
|
|
115 #end if
|
|
116 #if $options_type.optional_inputs.populations:
|
|
117 --populations '${options_type.optional_inputs.populations}'
|
|
118 #end if
|
|
119 #if $options_type.optional_inputs.A:
|
|
120 --cnv-map '${options_type.optional_inputs.A}'
|
|
121 #end if
|
|
122 #if str( $options_type.optional_inputs.input_variant_type.input_variant_type_selector ) == "provide_vcf":
|
|
123 --variant-input 'input_variant_vcf.vcf.gz' ## input_variant_vcf.vcf.gz is symlinked to a galaxy-generated dataset in "Tabixize optional input_variant_vcf file" section of the command line above
|
|
124 ${options_type.optional_inputs.input_variant_type.only_use_input_alleles}
|
|
125 #end if
|
|
126 #if $options_type.optional_inputs.haplotype_basis_alleles:
|
|
127 --haplotype-basis-alleles '${options_type.optional_inputs.haplotype_basis_alleles}'
|
|
128 #end if
|
|
129 #if $options_type.optional_inputs.observation_bias:
|
|
130 --observation-bias '${options_type.optional_inputs.observation_bias}'
|
|
131 #end if
|
|
132 #if $options_type.optional_inputs.contamination_estimates:
|
|
133 --contamination-estimates '${options_type.optional_inputs.contamination_estimates}'
|
|
134 #end if
|
|
135 #end if
|
|
136
|
|
137 ## REPORTING
|
|
138 #if str( $options_type.reporting.reporting_selector ) == "set":
|
|
139 --pvar ${options_type.reporting.pvar}
|
|
140 #end if
|
|
141 ## POPULATION MODEL
|
|
142 #if str( $options_type.population_model.population_model_selector ) == "set":
|
|
143 --theta ${options_type.population_model.T}
|
|
144 --ploidy ${options_type.population_model.P}
|
|
145 ${options_type.population_model.J}
|
|
146 ${options_type.population_model.K}
|
|
147 #end if
|
|
148
|
|
149 ## REFERENCE ALLELE
|
|
150 #if str( $options_type.reference_allele.reference_allele_selector ) == "set":
|
|
151 ${options_type.reference_allele.Z}
|
|
152 --reference-quality '${options_type.reference_allele.reference_quality}'
|
|
153 #end if
|
|
154
|
|
155 ## ALLELE SCOPE
|
|
156 #if str( $options_type.allele_scope.allele_scope_selector ) == "set":
|
|
157 ${options_type.allele_scope.I}
|
|
158 ${options_type.allele_scope.i}
|
|
159 ${options_type.allele_scope.X}
|
|
160 ${options_type.allele_scope.u}
|
|
161 ${options_type.allele_scope.no_partial_observations}
|
|
162
|
|
163 -n ${options_type.allele_scope.n}
|
|
164
|
|
165 --haplotype-length ${options_type.allele_scope.haplotype_length}
|
|
166 --min-repeat-size ${options_type.allele_scope.min_repeat_length}
|
|
167 --min-repeat-entropy ${options_type.allele_scope.min_repeat_entropy}
|
|
168 #end if
|
|
169
|
|
170 ## REALIGNMENT
|
|
171 ${options_type.O}
|
|
172
|
|
173 ##INPUT FILTERS
|
|
174 #if str( $options_type.input_filters.input_filters_selector ) == "set":
|
|
175 ${options_type.input_filters.use_duplicate_reads}
|
|
176 -m ${options_type.input_filters.m}
|
|
177 -q ${options_type.input_filters.q}
|
|
178 -R ${options_type.input_filters.R}
|
|
179 -Y ${options_type.input_filters.Y}
|
|
180 -e ${options_type.input_filters.e}
|
|
181 -F ${options_type.input_filters.F}
|
|
182 -C ${options_type.input_filters.C}
|
|
183 -G ${options_type.input_filters.G}
|
|
184
|
|
185 #if str( $options_type.input_filters.mismatch_filters.mismatch_filters_selector ) == "set":
|
|
186 -Q ${options_type.input_filters.mismatch_filters.Q}
|
|
187 #if str($options_type.input_filters.mismatch_filters.U)
|
|
188 -U ${options_type.input_filters.mismatch_filters.U}
|
|
189 #end if
|
|
190 -z ${options_type.input_filters.mismatch_filters.z}
|
|
191
|
|
192 --read-snp-limit ${options_type.input_filters.mismatch_filters.read_snp_limit}
|
|
193 #end if
|
|
194
|
|
195 --min-coverage ${options_type.input_filters.min_coverage}
|
|
196 --min-alternate-qsum ${options_type.input_filters.min_alternate_qsum}
|
|
197 #end if
|
|
198
|
|
199 ## POPULATION AND MAPPABILITY PRIORS
|
|
200 #if str( $options_type.population_mappability_priors.population_mappability_priors_selector ) == "set":
|
|
201 ${options_type.population_mappability_priors.k}
|
|
202 ${options_type.population_mappability_priors.w}
|
|
203 ${options_type.population_mappability_priors.V}
|
|
204 ${options_type.population_mappability_priors.a}
|
|
205 #end if
|
|
206
|
|
207 ## GENOTYPE LIKELIHOODS
|
|
208 #if str( $options_type.genotype_likelihoods.genotype_likelihoods_selector ) == "set":
|
|
209 ${$options_type.genotype_likelihoods.experimental_gls}
|
|
210
|
|
211 --base-quality-cap ${$options_type.genotype_likelihoods.base_quality_cap}
|
|
212 --prob-contamination ${$options_type.genotype_likelihoods.prob_contamination}
|
|
213 #end if
|
|
214
|
|
215 ## ALGORITHMIC FEATURES
|
|
216 #if str( $options_type.algorithmic_features.algorithmic_features_selector ) == "set":
|
|
217 -B '${options_type.algorithmic_features.B}'
|
|
218 -W '${options_type.algorithmic_features.W}'
|
|
219 -D '${options_type.algorithmic_features.D}'
|
|
220
|
|
221 #if str($options_type.algorithmic_features.genotype_variant_threshold)
|
|
222 -S ${options_type.algorithmic_features.genotype_variant_threshold}
|
|
223 #end if
|
|
224
|
|
225 ${options_type.algorithmic_features.N}
|
|
226 ${options_type.algorithmic_features.j}
|
|
227 ${options_type.algorithmic_features.H}
|
|
228 ${options_type.algorithmic_features.genotype_qualities}
|
|
229 ${options_type.algorithmic_features.report_genotype_likelihood_max}
|
|
230
|
|
231 --genotyping-max-banddepth ${options_type.algorithmic_features.genotyping_max_banddepth}
|
|
232 #end if
|
|
233 #end if
|
|
234
|
|
235 ";
|
|
236 done > freebayes_commands.sh &&
|
|
237
|
|
238 cat freebayes_commands.sh |
|
|
239 parallel --will-cite -j \${GALAXY_SLOTS:-1} &&
|
|
240
|
|
241 ## make VCF header
|
|
242 grep "^#" "./vcf_output/part_\$i.vcf" > header.txt &&
|
|
243
|
|
244 for i in `cat regions_uniq.bed | awk '{print $1":"$2".."$3}'`;
|
|
245 do
|
|
246 ## if this fails then it bails out the script
|
|
247 cat "./vcf_output/part_\$i.vcf" | grep -v "^#" || true
|
|
248 ;
|
|
249 done | sort -k1,1 -k2,2n -k5,5 -u | cat header.txt - > '${output_vcf}'
|
|
250
|
|
251 #if str( $options_type.options_type_selector ) == "full":
|
|
252 #if str( $options_type.optional_inputs.optional_inputs_selector ) == 'set':
|
|
253 #if $options_type.optional_inputs.output_failed_alleles_option:
|
|
254 &&
|
|
255 for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`;
|
|
256 do
|
|
257 cat "./failed_alleles/part_\$i.bed"
|
|
258 ;
|
|
259 done > '${output_failed_alleles_bed}'
|
|
260 #end if
|
|
261
|
|
262 #if $options_type.optional_inputs.output_trace_option:
|
|
263 &&
|
|
264 for i in `cat regions.bed | awk '{print $1":"$2".."$3}'`;
|
|
265 do
|
|
266 cat './trace/part_\$i.txt'
|
|
267 ;
|
|
268 done > '${output_trace}'
|
|
269 #end if
|
|
270 #end if
|
|
271 #end if
|
|
272 ]]></command>
|
|
273
|
|
274 <inputs>
|
|
275 <conditional name="reference_source">
|
|
276 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
|
|
277 <option value="cached">Locally cached</option>
|
|
278 <option value="history">History</option>
|
|
279 </param>
|
|
280 <when value="cached">
|
|
281 <expand macro="input_bam">
|
|
282 <expand macro="validation" />
|
|
283 </expand>
|
|
284 <param name="ref_file" type="select" label="Using reference genome">
|
|
285 <options from_data_table="fasta_indexes" />
|
|
286 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input dataset"/>
|
|
287 </param>
|
|
288 </when>
|
|
289 <when value="history"> <!-- FIX ME!!!! -->
|
|
290 <expand macro="input_bam" />
|
|
291 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence"
|
|
292 help="You can upload a FASTA sequence to the history and use it as reference" />
|
|
293 </when>
|
|
294 </conditional>
|
|
295 <conditional name="target_limit_type">
|
|
296 <param name="target_limit_type_selector" type="select" label="Limit variant calling to a set of regions?" help="Sets --targets or --region options">
|
|
297 <option value="do_not_limit" selected="true">Do not limit</option>
|
|
298 <option value="limit_by_target_file">Limit by target file</option>
|
|
299 <option value="limit_by_region">Limit to region</option>
|
|
300 </param>
|
|
301 <when value="do_not_limit" />
|
|
302 <when value="limit_by_target_file">
|
|
303 <param name="input_target_bed" argument="--targets" type="data" format="bed" label="Limit analysis to regions in this BED dataset" />
|
|
304 </when>
|
|
305 <when value="limit_by_region">
|
|
306 <param name="region_chromosome" argument="--region" type="text" label="Region Chromosome" value="" /> <!--only once? -->
|
|
307 <param name="region_start" type="integer" label="Region Start" value="" />
|
|
308 <param name="region_end" type="integer" label="Region End" value="" />
|
|
309 </when>
|
|
310 </conditional>
|
|
311 <conditional name="coverage_options">
|
|
312 <param name="coverage_options_selector" type="select" label="Read coverage"
|
|
313 help="Sets --min-coverage, --limit-coverage, and --skip-coverage">
|
|
314 <option value="do_not_set" selected="true">Use defaults</option>
|
|
315 <option value="set">Specify coverage options</option>
|
|
316 </param>
|
|
317 <when value="set">
|
|
318 <expand macro="par_min_cov" />
|
|
319 </when>
|
|
320 <when value="do_not_set" />
|
|
321 </conditional>
|
|
322 <conditional name="options_type">
|
|
323 <param name="options_type_selector" type="select" label="Choose parameter selection level"
|
|
324 help="Select how much control over the freebayes run you need">
|
|
325 <option value="simple" selected="true">1. Simple diploid calling</option>
|
|
326 <option value="simple_w_filters">2. Simple diploid calling with filtering and coverage</option>
|
|
327 <option value="naive">3. Frequency-based pooled calling</option>
|
|
328 <option value="naive_w_filters">4. Frequency-based pooled calling with filtering and coverage</option>
|
|
329 <option value="full">5. Full list of options</option>
|
|
330 </param>
|
|
331 <when value="full">
|
|
332
|
|
333 <conditional name="optional_inputs">
|
|
334 <param name="optional_inputs_selector" type="select" label="Additional inputs"
|
|
335 help="Sets --samples, --populations, --cnv-map, --trace, --failed-alleles, --variant-input, --only-use-input-alleles, --haplotype-basis-alleles, --report-all-haplotype-alleles, --report-monomorphic options, --observation-bias, and --contamination-estimates">
|
|
336 <option value="do_not_set" selected="true">Do not provide additional inputs</option>
|
|
337 <option value="set">Provide additional inputs</option>
|
|
338 </param>
|
|
339 <when value="set">
|
|
340 <param name="output_failed_alleles_option" argument="--failed-alleles" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="false"
|
|
341 label="Write out failed alleles file" />
|
|
342 <param name="output_trace_option" argument="--trace" type="boolean" truevalue="--trace" falsevalue="" checked="false"
|
|
343 label="Write out algorithm trace file" />
|
|
344 <param argument="--samples" type="data" format="txt"
|
|
345 label="Limit analysis to samples listed (one per line) in this dataset" optional="true"
|
|
346 help="By default FreeBayes will analyze all samples in its input BAM datasets" />
|
|
347 <param argument="--populations" type="data" format="txt" optional="true"
|
|
348 label="Populations dataset"
|
|
349 help="Each line of this dataset should list a sample and a population which it is part of. The population-based bayesian inference model will then be partitioned on the basis of the populations" />
|
|
350 <param name="A" argument="--cnv-map" type="data" format="bed" optional="true"
|
|
351 label="Read a copy number map from a BED dataset"
|
|
352 help="The BED dataset should have the format: 'reference sequence, start, end, sample name, copy number' for each region in each sample which does not have the default copy number as set by --ploidy. If not specified, copy number is set to as specified by --ploidy" />
|
|
353 <conditional name="input_variant_type">
|
|
354 <param name="input_variant_type_selector" type="select" label="Provide variants dataset">
|
|
355 <option value="do_not_provide" selected="true">Do not provide</option>
|
|
356 <option value="provide_vcf">Provide VCF dataset</option>
|
|
357 </param>
|
|
358 <when value="do_not_provide" />
|
|
359 <when value="provide_vcf">
|
|
360 <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip"
|
|
361 label="Use variants reported in this VCF dataset as input to the algorithm">
|
|
362 <conversion name="Tabixized_input" type="tabix" />
|
|
363 </param>
|
|
364 <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false"
|
|
365 label="Only provide variant calls and genotype likelihoods for sites in VCF" />
|
|
366 </when>
|
|
367 </conditional>
|
|
368 <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true"
|
|
369 label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" />
|
|
370 <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false"
|
|
371 label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" />
|
|
372 <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true"
|
|
373 label="Load read length-dependent allele observation biases from"
|
|
374 help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
|
|
375 <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true"
|
|
376 label="Upload per-sample estimates of contamination from"
|
|
377 help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" />
|
|
378 </when>
|
|
379 <when value="do_not_set" />
|
|
380 </conditional>
|
|
381
|
|
382 <!-- reporting -->
|
|
383 <conditional name="reporting">
|
|
384 <param name="reporting_selector" type="select" label="Reporting options" help="Sets -P --pvar option">
|
|
385 <option value="do_not_set" selected="true">Use defaults</option>
|
|
386 <option value="set">Set reporting options</option>
|
|
387 </param>
|
|
388 <when value="set">
|
|
389 <param argument="--pvar" type="float" value="0.0"
|
|
390 label="Report sites if the probability that there is a polymorphism at the site is greater than"
|
|
391 help="Note that post-filtering is generally recommended over the use of this parameter" />
|
|
392 </when>
|
|
393 <when value="do_not_set" />
|
|
394 </conditional>
|
|
395
|
|
396 <!-- population model -->
|
|
397 <conditional name="population_model">
|
|
398 <param name="population_model_selector" type="select" label="Population model options"
|
|
399 help="Sets --theta, --ploidy, --pooled-discrete, and --pooled-continuous options">
|
|
400 <option value="do_not_set" selected="true">Use defaults</option>
|
|
401 <option value="set">Set population model options</option>
|
|
402 </param>
|
|
403 <when value="set">
|
|
404 <param name="T" argument="--theta" type="float" value="0.001"
|
|
405 label="The expected mutation rate or pairwise nucleotide diversity among the population under analysis"
|
|
406 help="This serves as the single parameter to the Ewens Sampling Formula prior model" />
|
|
407 <param name="P" argument="--ploidy" type="integer" value="2"
|
|
408 label="Set ploidy for the analysis" />
|
|
409 <param name="J" argument="--pooled-discrete" type="boolean" truevalue="-J" falsevalue="" checked="false"
|
|
410 label="Assume that samples result from pooled sequencing"
|
|
411 help="Model pooled samples using discrete genotypes across pools. When using this flag, set --ploidy to the number of alleles in each sample or use the --cnv-map to define per-sample ploidy" />
|
|
412 <param name="K" argument="--poled-continuous" type="boolean" truevalue="-K" falsevalue="" checked="false"
|
|
413 label="Output all alleles which pass input filters, regardles of genotyping outcome or model" />
|
|
414 </when>
|
|
415 <when value="do_not_set" />
|
|
416 </conditional>
|
|
417
|
|
418 <!-- reference allele -->
|
|
419 <conditional name="reference_allele">
|
|
420 <param name="reference_allele_selector" type="select" label="Reference allele options"
|
|
421 help="Sets --use-reference-allele and --reference-quality options">
|
|
422 <option value="do_not_set" selected="true">Use defaults</option>
|
|
423 <option value="set">Set reference allele options</option>
|
|
424 </param>
|
|
425 <when value="set">
|
|
426 <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false"
|
|
427 label="Include the reference allele in the analysis as if it is another sample from the same population" />
|
|
428 <param name="reference_quality" argument="--reference-quality" type="text" value="100,60"
|
|
429 label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" />
|
|
430 </when>
|
|
431 <when value="do_not_set" />
|
|
432 </conditional>
|
|
433
|
|
434 <!-- allelic scope -->
|
|
435 <conditional name="allele_scope">
|
|
436 <param name="allele_scope_selector" type="select" label="Allelic scope options"
|
|
437 help="Sets -I, i, -X, -u, -n, --haplotype-length, --min-repeat-size, --min-repeat-entropy, and --no-partial-observations options">
|
|
438 <option value="do_not_set" selected="true">Use defaults</option>
|
|
439 <option value="set">Set alleic scope options</option>
|
|
440 </param>
|
|
441 <when value="set">
|
|
442 <param name="I" argument="--no-snps" type="boolean" truevalue="-I" falsevalue="" checked="false"
|
|
443 label="Ignore SNP alleles" />
|
|
444 <param name="i" argument="--no-indels" type="boolean" truevalue="-i" falsevalue="" checked="false"
|
|
445 label="Ignore indels alleles" />
|
|
446 <param name="X" argument="--no-mnps" type="boolean" truevalue="-X" falsevalue="" checked="false"
|
|
447 label="Ignore multi-nucleotide polymorphisms, MNPs" />
|
|
448 <param name="u" argument="--no-complex" type="boolean" truevalue="-u" falsevalue="" checked="false"
|
|
449 label="Ignore complex events (composites of other classes)" />
|
|
450 <param name="n" argument="--use-best-n-alleles" type="integer" value="0"
|
|
451 label="How many best SNP alleles to evaluate"
|
|
452 help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
|
|
453 <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3"
|
|
454 label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" />
|
|
455 <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5"
|
|
456 label="When assembling observations across repeats, require the total repeat length at least this many bp" />
|
|
457 <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1"
|
|
458 label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" />
|
|
459 <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
|
|
460 label="Exclude observations which do not fully span the dynamically-determined detection window"
|
|
461 help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" />
|
|
462 </when>
|
|
463 <when value="do_not_set" />
|
|
464 </conditional>
|
|
465
|
|
466 <!-- indel realignment -->
|
|
467 <param name="O" argument="--dont-left-align-indels" type="boolean" truevalue="-O" falsevalue="" checked="false"
|
|
468 label="Turn off left-alignment of indels" />
|
|
469
|
|
470 <!-- input filters -->
|
|
471 <conditional name="input_filters">
|
|
472 <param name="input_filters_selector" type="select" label="Input filters"
|
|
473 help="Sets -4, -m, -q, -R, -Y, -Q, -U, -z, -$, -e, -0, -F, -C, -3, -G, and -! options">
|
|
474 <option value="do_not_set" selected="true">No input filters (default)</option>
|
|
475 <option value="set">Set input filters</option>
|
|
476 </param>
|
|
477 <when value="set">
|
|
478 <param name="use_duplicate_reads" argument="--use-duplicate-reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="false"
|
|
479 label="Include duplicate-marked alignments in the analysis" />
|
|
480 <param name="m" argument="--min-mapping-quality" type="integer" value="1"
|
|
481 label="Exclude alignments from analysis if they have a mapping quality less than" />
|
|
482 <param name="q" argument="--min-base-quality" type="integer" value="0"
|
|
483 label="Exclude alleles from analysis if their supporting base quality less than" />
|
|
484 <param name="R" argument="--min-supporting-allele-qsum" type="integer" value="0"
|
|
485 label="Consider any allele in which the sum of qualities of supporting observations is at least" />
|
|
486 <param name="Y" argument="--min-supporting-mapping-qsum" type="integer" value="0"
|
|
487 label="Consider any allele in which and the sum of mapping qualities of supporting reads is at least" />
|
|
488 <conditional name="mismatch_filters">
|
|
489 <param name="mismatch_filters_selector" type="select" label="Mismatch filters"
|
|
490 help="Sets -Q, -U, -z, and $ options">
|
|
491 <option value="do_not_set" selected="true">No mismatch filters (default)</option>
|
|
492 <option value="set">Set mismatch filters</option>
|
|
493 </param>
|
|
494 <when value="set">
|
|
495 <param name="Q" argument="--mismatch-base-quality-threshold" type="integer" value="10"
|
|
496 label="Count mismatches toward -U (option below) if the base quality of the mismatch is >=" />
|
|
497 <param name="U" type="integer" argument="--read-mismatch-limit" value="1000" optional="true"
|
|
498 label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (option above)"
|
|
499 help="default=~unbounded" />
|
|
500 <param name="z" argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0"
|
|
501 label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" />
|
|
502 <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000"
|
|
503 label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)"
|
|
504 help="default=~unbounded" />
|
|
505 </when>
|
|
506 <when value="do_not_set" />
|
|
507 </conditional>
|
|
508 <param name="e" argument="--read-indel-limit" type="integer" value="1000"
|
|
509 label="Exclude reads with more than this number of separate gaps"
|
|
510 help="default=~unbounded" />
|
|
511 <param name="standard_filters" argument="--standard-filters" type="boolean" truevalue="-0" falsevalue="" checked="false"
|
|
512 label="Use stringent input base and mapping quality filters"
|
|
513 help="Equivalent to -m 30 -q 20 -R 0 -S 0" />
|
|
514 <param name="F" argument="--min-alternate-fraction" type="float" value="0.05"
|
|
515 label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" />
|
|
516 <param name="C" argument="--min-alternate-count" type="integer" value="2"
|
|
517 label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" />
|
|
518 <param name="min_alternate_qsum" argument="--min-alternate-qsum" type="integer" value="0"
|
|
519 label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" />
|
|
520 <param name="G" argument="--min-alternate-total" type="integer" value="1"
|
|
521 label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" />
|
|
522 </when>
|
|
523 <when value="do_not_set" />
|
|
524 </conditional>
|
|
525
|
|
526 <!-- population and mappability priors -->
|
|
527 <conditional name="population_mappability_priors">
|
|
528 <param name="population_mappability_priors_selector" type="select" label="Population and mappability priors"
|
|
529 help="Sets -k, -w, -V, and -a options">
|
|
530 <option value="do_not_set" selected="true">Use defaults</option>
|
|
531 <option value="set">Set population and mappability priors</option>
|
|
532 </param>
|
|
533 <when value="set">
|
|
534 <param name="k" argument="--no-population-priors" type="boolean" truevalue="-k" falsevalue="" checked="false"
|
|
535 label="No population priors"
|
|
536 help="Equivalent to --pooled-discrete --hwe-priors-off and removal of Ewens Sampling Formula component of priors" />
|
|
537 <param name="w" argument="--hwe-priors-off" type="boolean" truevalue="-w" falsevalue="" checked="false"
|
|
538 label="Disable estimation of the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
|
|
539 <param name="V" argument="--binomial-obs-priors-off" type="boolean" truevalue="-V" falsevalue="" checked="false"
|
|
540 label="Disable incorporation of prior expectations about observations"
|
|
541 help="Uses read placement probability, strand balance probability, and read position (5''-3'') probability" />
|
|
542 <param name="a" argument="--allele-balance-priors-off" type="boolean" truevalue="-a" falsevalue="" checked="false"
|
|
543 label="Disable use of aggregate probability of observation balance between alleles as a component of the priors" />
|
|
544 </when>
|
|
545 <when value="do_not_set" />
|
|
546 </conditional>
|
|
547
|
|
548 <!-- genotype likelihoods -->
|
|
549 <conditional name="genotype_likelihoods">
|
|
550 <param name="genotype_likelihoods_selector" type="select" label="Genotype likelihood options"
|
|
551 help="Sets --base-quality-cap, --experimental-gls, and --prob-contamination options">
|
|
552 <option value="do_not_set" selected="true">Use defaults</option>
|
|
553 <option value="set">Set genotype likelihood options</option>
|
|
554 </param>
|
|
555 <when value="set">
|
|
556 <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0"
|
|
557 label="Limit estimated observation quality by capping base quality at" />
|
|
558 <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false"
|
|
559 label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual"
|
|
560 help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" />
|
|
561 <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9"
|
|
562 label="An estimate of contamination to use for all samples" />
|
|
563 </when>
|
|
564 <when value="do_not_set" />
|
|
565 </conditional>
|
|
566
|
|
567 <!-- algorithmic features -->
|
|
568 <conditional name="algorithmic_features">
|
|
569 <param name="algorithmic_features_selector" type="select" label="Algorithmic features"
|
|
570 help="Sets --report-genotypes-likelihood-max, -B, --genotyping-max-banddepth, -W, -N, S, -j, -H, -D, -= options">
|
|
571 <option value="do_not_set" selected="true">Use defaults</option>
|
|
572 <option value="set">Set algorithmic features</option>
|
|
573 </param>
|
|
574 <when value="set">
|
|
575 <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false"
|
|
576 label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" />
|
|
577 <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000"
|
|
578 label="Iterate no more than N times during genotyping step" />
|
|
579 <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6"
|
|
580 label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" />
|
|
581 <param name="W" argument="--posterior-integration-limits" type="text" value="1,3"
|
|
582 label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" />
|
|
583 <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false"
|
|
584 label="Skip sample genotypings for which the sample has no supporting reads" />
|
|
585 <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true"
|
|
586 label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample"
|
|
587 help="default=~unbounded" />
|
|
588 <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false"
|
|
589 label="Use mapping quality of alleles when calculating data likelihoods" />
|
|
590 <param name="H" argument="--harmonic-indel-quality" type="boolean" truevalue="-H" falsevalue="" checked="false"
|
|
591 label="Use a weighted sum of base qualities around an indel, scaled by the distance from the indel"
|
|
592 help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" />
|
|
593 <param name="D" argument="--read-dependence-factor" type="float" value="0.9"
|
|
594 label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" />
|
|
595 <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false"
|
|
596 label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" />
|
|
597 </when>
|
|
598 <when value="do_not_set" />
|
|
599 </conditional>
|
|
600 </when>
|
|
601 <when value="simple" />
|
|
602 <when value="simple_w_filters" />
|
|
603 <when value="naive" />
|
|
604 <when value="naive_w_filters" />
|
|
605 </conditional>
|
|
606 </inputs>
|
|
607 <outputs>
|
|
608 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
|
|
609 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
|
|
610 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_failed_alleles_option'] is True</filter>
|
|
611 </data>
|
|
612 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
|
|
613 <filter>( options_type['options_type_selector'] == 'cline' or options_type['options_type_selector'] == 'full' ) and options_type['optional_inputs']['optional_inputs_selector'] == 'set' and options_type['optional_inputs']['output_trace_option'] is True</filter>
|
|
614 </data>
|
|
615 </outputs>
|
|
616 <tests>
|
|
617 <test>
|
|
618 <param name="reference_source_selector" value="history" />
|
|
619 <param name="processmode" value="individual" />
|
|
620 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
|
|
621 <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
|
|
622 <param name="options_type_selector" value="simple"/>
|
|
623 <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="4" />
|
|
624 </test>
|
|
625 <test>
|
|
626 <param name="reference_source_selector" value="history" />
|
|
627 <param name="processmode" value="individual" />
|
|
628 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
|
|
629 <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
|
|
630 <param name="options_type_selector" value="naive_w_filters"/>
|
|
631 <param name="min_coverage" value="14"/>
|
|
632 <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" />
|
|
633 </test>
|
|
634 <test>
|
|
635 <param name="reference_source_selector" value="history" />
|
|
636 <param name="processmode" value="individual" />
|
|
637 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
|
|
638 <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
|
|
639 <param name="options_type_selector" value="naive_w_filters"/>
|
|
640 <param name="min_coverage" value="14"/>
|
|
641 <output name="output_vcf" file="freebayes-phix174-test3.vcf" lines_diff="4" />
|
|
642 </test>
|
|
643 <test>
|
|
644 <param name="reference_source_selector" value="history" />
|
|
645 <param name="processmode" value="individual" />
|
|
646 <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
|
|
647 <param name="input_bams" ftype="bam" value="freebayes-phix174.bam"/>
|
|
648 <param name="options_type_selector" value="full"/>
|
|
649 <param name="population_model_selector" value="set"/>
|
|
650 <param name="P" value="1"/>
|
|
651 <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" />
|
|
652 </test>
|
|
653 </tests>
|
|
654 <help><![CDATA[
|
|
655 **What it does**
|
|
656
|
|
657 FreeBayes is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and complex events (composite insertion and substitution events) smaller than the length of a short-read sequencing alignment.
|
|
658
|
|
659 See https://github.com/ekg/freebayes for details on FreeBayes.
|
|
660
|
|
661 ------
|
|
662
|
|
663 **Description**
|
|
664
|
|
665 Provided some BAM dataset(s) and a reference sequence, FreeBayes will produce a VCF dataset describing SNPs, indels, and complex variants in samples in the input alignments.
|
|
666
|
|
667 By default, FreeBayes will consider variants supported by at least 2 observations in a single sample (-C) and also by at least 20% of the reads from a single sample (-F). These settings are suitable to low to high depth sequencing in haploid and diploid samples, but users working with polyploid or pooled samples may wish to adjust them depending on the characteristics of their sequencing data.
|
|
668
|
|
669 FreeBayes is capable of calling variant haplotypes shorter than a read length where multiple polymorphisms segregate on the same read. The maximum distance between polymorphisms phased in this way is determined by the --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be set to half the read length.
|
|
670
|
|
671 Ploidy may be set to any level (-p), but by default all samples are assumed to be diploid. FreeBayes can model per-sample and per-region variation in copy-number (-A) using a copy-number variation map.
|
|
672
|
|
673 FreeBayes can act as a frequency-based pooled caller and describe variants and haplotypes in terms of observation frequency rather than called genotypes. To do so, use --pooled-continuous and set input filters to a suitable level. Allele observation counts will be described by AO and RO fields in the VCF output.
|
|
674
|
|
675 -------
|
|
676
|
|
677 **Galaxy-specific options**
|
|
678
|
|
679 Galaxy allows five levels of control over FreeBayes options, provided by the **Choose parameter selection level** menu option. These are:
|
|
680
|
|
681 1. *Simple diploid calling*: The simplest possible FreeBayes application. Equivalent to using FreeBayes with only a BAM input and no other parameter options.
|
|
682 2. *Simple diploid calling with filtering and coverage*: Same as #1 plus two additional options: -0 (standard filters: --min-mapping-quality 30 --min-base-quality 20 --min-supporting-allele-qsum 0 --genotype-variant-threshold 0) and --min-coverage.
|
|
683 3. *Frequency-based pooled calling*: This is equivalent to using FreeBayes with the following options: --haplotype-length 0 --min-alternate-count 1 --min-alternate-fraction 0 --pooled-continuous --report-monomorphic. This is the best choice for calling variants in mixtures such as viral, bacterial, or organellar genomes.
|
|
684 4. *Frequency-based pooled calling with filtering and coverage*: Same as #3 but adds -0 and --min-coverage like in #2.
|
|
685 5. *Complete list of all options*: Gives you full control by exposing all FreeBayes options as Galaxy parameters.
|
|
686
|
|
687 ------
|
|
688
|
|
689 **Command-line parameters**
|
|
690
|
|
691 **Input**::
|
|
692
|
|
693 --bam FILE The file or set of BAM files to be analyzed.
|
|
694 --bam-list FILE A file containing a list of BAM files to be analyzed.
|
|
695
|
|
696 --stdin Read BAM input on stdin.
|
|
697 --fasta-reference FILE Use FILE as the reference sequence for analysis.
|
|
698 An index file (FILE.fai) will be created if none exists.
|
|
699 If neither --targets nor --region are specified, FreeBayes
|
|
700 will analyze every position in this reference.
|
|
701 --targets FILE Limit analysis to targets listed in the BED-format FILE.
|
|
702 --region <chrom>:<start>-<end> Limit analysis to the specified region, 0-base coordinates,
|
|
703 end_position not included (same as BED format).
|
|
704 Either '-' or '..' maybe used as a separator.
|
|
705 --samples FILE Limit analysis to samples listed (one per line) in the FILE.
|
|
706 By default FreeBayes will analyze all samples in its input
|
|
707 BAM files.
|
|
708 --populations FILE Each line of FILE should list a sample and a population which
|
|
709 it is part of. The population-based bayesian inference model
|
|
710 will then be partitioned on the basis of the populations.
|
|
711 --cnv-map FILE Read a copy number map from the BED file FILE, which has
|
|
712 either a sample-level ploidy:
|
|
713 sample_name copy_number
|
|
714 or a region-specific format:
|
|
715 seq_name start end sample_name copy_number
|
|
716 ... for each region in each sample which does not have the
|
|
717 default copy number as set by --ploidy. These fields can be delimited
|
|
718 by space or tab.
|
|
719
|
|
720 **Output**::
|
|
721
|
|
722 --vcf FILE Output VCF-format results to FILE. (default: stdout)
|
|
723 --gvcf Write gVCF output, which indicates coverage in uncalled regions.
|
|
724 --gvcf-chunk NUM When writing gVCF output emit a record for every NUM bases.
|
|
725 --gvcf-dont-use-chunk When writing the gVCF output emit a record for all bases if
|
|
726 set to "true" , will also route an int to --gvcf-chunk
|
|
727 similar to --output-mode EMIT_ALL_SITES from GATK
|
|
728 --variant-input VCF Use variants reported in VCF file as input to the algorithm.
|
|
729 Variants in this file will included in the output even if
|
|
730 there is not enough support in the data to pass input filters.
|
|
731 --only-use-input-alleles Only provide variant calls and genotype likelihoods for sites
|
|
732 and alleles which are provided in the VCF input, and provide
|
|
733 output in the VCF for all input alleles, not just those which
|
|
734 have support in the data.
|
|
735 --haplotype-basis-alleles VCF When specified, only variant alleles provided in this input
|
|
736 VCF will be used for the construction of complex or haplotype
|
|
737 alleles.
|
|
738 --report-all-haplotype-alleles At sites where genotypes are made over haplotype alleles,
|
|
739 provide information about all alleles in output, not only
|
|
740 those which are called.
|
|
741 --report-monomorphic Report even loci which appear to be monomorphic, and report all
|
|
742 considered alleles, even those which are not in called genotypes.
|
|
743 Loci which do not have any potential alternates have '.' for ALT.
|
|
744 --pvar N Report sites if the probability that there is a polymorphism
|
|
745 at the site is greater than N. default: 0.0. Note that post-
|
|
746 filtering is generally recommended over the use of this parameter.
|
|
747 --strict-vcf Generate strict VCF format (FORMAT/GQ will be an int)
|
|
748
|
|
749 **Population model**::
|
|
750
|
|
751 --theta N The expected mutation rate or pairwise nucleotide diversity
|
|
752 among the population under analysis. This serves as the
|
|
753 single parameter to the Ewens Sampling Formula prior model
|
|
754 default: 0.001
|
|
755 --ploidy N Sets the default ploidy for the analysis to N. default: 2
|
|
756 --pooled-discrete Assume that samples result from pooled sequencing.
|
|
757 Model pooled samples using discrete genotypes across pools.
|
|
758 When using this flag, set --ploidy to the number of
|
|
759 alleles in each sample or use the --cnv-map to define
|
|
760 per-sample ploidy.
|
|
761 --pooled-continuous Output all alleles which pass input filters, regardles of
|
|
762 genotyping outcome or model.
|
|
763
|
|
764 **Reference allele**::
|
|
765
|
|
766 --use-reference-allele This flag includes the reference allele in the analysis as
|
|
767 if it is another sample from the same population.
|
|
768 --reference-quality MQ,BQ Assign mapping quality of MQ to the reference allele at each
|
|
769 site and base quality of BQ. default: 100,60
|
|
770
|
|
771 **Allele scope**::
|
|
772
|
|
773 --use-best-n-alleles N Evaluate only the best N SNP alleles, ranked by sum of
|
|
774 supporting quality scores. (Set to 0 to use all; default: all)
|
|
775 --max-complex-gap
|
|
776 --haplotype-length N Allow haplotype calls with contiguous embedded matches of up
|
|
777 to this length. Set N=-1 to disable clumping. (default: 3)
|
|
778 --min-repeat-size When assembling observations across repeats, require the total repeat
|
|
779 length at least this many bp. (default: 5)
|
|
780 --min-repeat-entropy N To detect interrupted repeats, build across sequence until it has
|
|
781 entropy > N bits per bp. Set to 0 to turn off. (default: 1)
|
|
782 --no-partial-observations Exclude observations which do not fully span the dynamically-determined
|
|
783 detection window. (default, use all observations, dividing partial
|
|
784 support across matching haplotypes when generating haplotypes.)
|
|
785
|
|
786 **Indel realignment**::
|
|
787
|
|
788 --dont-left-align-indels Turn off left-alignment of indels, which is enabled by default.
|
|
789
|
|
790 **Input filters**::
|
|
791
|
|
792 --use-duplicate-reads Include duplicate-marked alignments in the analysis.
|
|
793 default: exclude duplicates marked as such in alignments
|
|
794 --min-mapping-quality Q Exclude alignments from analysis if they have a mapping
|
|
795 quality less than Q. default: 1
|
|
796 --min-base-quality Q Exclude alleles from analysis if their supporting base
|
|
797 quality is less than Q. default: 0
|
|
798 --min-supporting-allele-qsum Q Consider any allele in which the sum of qualities of supporting
|
|
799 observations is at least Q. default: 0
|
|
800 --min-supporting-mapping-qsum Q Consider any allele in which and the sum of mapping qualities of
|
|
801 supporting reads is at least Q. default: 0
|
|
802 --mismatch-base-quality-threshold Q Count mismatches toward --read-mismatch-limit if the base
|
|
803 quality of the mismatch is >= Q. default: 10
|
|
804 --read-mismatch-limit N Exclude reads with more than N mismatches where each mismatch
|
|
805 has base quality >= mismatch-base-quality-threshold.
|
|
806 default: ~unbounded
|
|
807 --read-max-mismatch-fraction N Exclude reads with more than N [0,1] fraction of mismatches where
|
|
808 each mismatch has base quality >= mismatch-base-quality-threshold
|
|
809 default: 1.0
|
|
810 --read-snp-limit N Exclude reads with more than N base mismatches, ignoring gaps
|
|
811 with quality >= mismatch-base-quality-threshold.
|
|
812 default: ~unbounded
|
|
813 --read-indel-limit N Exclude reads with more than N separate gaps.
|
|
814 default: ~unbounded
|
|
815 --standard-filters Use stringent input base and mapping quality filters
|
|
816 Equivalent to -m 30 -q 20 -R 0 -S 0
|
|
817 --min-alternate-fraction N Require at least this fraction of observations supporting
|
|
818 an alternate allele within a single individual in the
|
|
819 in order to evaluate the position. default: 0.05
|
|
820 --min-alternate-count N Require at least this count of observations supporting
|
|
821 an alternate allele within a single individual in order
|
|
822 to evaluate the position. default: 2
|
|
823 --min-alternate-qsum N Require at least this sum of quality of observations supporting
|
|
824 an alternate allele within a single individual in order
|
|
825 to evaluate the position. default: 0
|
|
826 --min-alternate-total N Require at least this count of observations supporting
|
|
827 an alternate allele within the total population in order
|
|
828 to use the allele in analysis. default: 1
|
|
829 --min-coverage N Require at least this coverage to process a site. default: 0
|
|
830 --limit-coverage N Downsample per-sample coverage to this level if greater than this coverage.
|
|
831 default: no limit
|
|
832 --skip-coverage N Skip processing of alignments overlapping positions with coverage >N.
|
|
833 This filters sites above this coverage, but will also reduce data nearby.
|
|
834 default: no limit
|
|
835
|
|
836 **Population priors**::
|
|
837
|
|
838 --no-population-priors Equivalent to --pooled-discrete --hwe-priors-off and removal of
|
|
839 Ewens Sampling Formula component of priors.
|
|
840
|
|
841 **Mappability priors**::
|
|
842
|
|
843 --hwe-priors-off Disable estimation of the probability of the combination
|
|
844 arising under HWE given the allele frequency as estimated
|
|
845 by observation frequency.
|
|
846 --binomial-obs-priors-off Disable incorporation of prior expectations about observations.
|
|
847 Uses read placement probability, strand balance probability,
|
|
848 and read position (5'-3') probability.
|
|
849 --allele-balance-priors-off Disable use of aggregate probability of observation balance between alleles
|
|
850 as a component of the priors.
|
|
851
|
|
852 **Genotype likelihoods**::
|
|
853
|
|
854 --observation-bias FILE Read length-dependent allele observation biases from FILE.
|
|
855 The format is [length] [alignment efficiency relative to reference]
|
|
856 where the efficiency is 1 if there is no relative observation bias.
|
|
857 --base-quality-cap Q Limit estimated observation quality by capping base quality at Q.
|
|
858 --prob-contamination F An estimate of contamination to use for all samples. default: 10e-9
|
|
859 --legacy-gls Use legacy (polybayes equivalent) genotype likelihood calculations
|
|
860 --contamination-estimates FILE A file containing per-sample estimates of contamination, such as
|
|
861 those generated by VerifyBamID. The format should be:
|
|
862 sample p(read=R|genotype=AR) p(read=A|genotype=AA)
|
|
863 Sample '*' can be used to set default contamination estimates.
|
|
864
|
|
865 **Algorithmic features**::
|
|
866
|
|
867 --report-genotype-likelihood-max Report genotypes using the maximum-likelihood estimate provided
|
|
868 from genotype likelihoods.
|
|
869 --genotyping-max-iterations N Iterate no more than N times during genotyping step. default: 1000.
|
|
870 --genotyping-max-banddepth N Integrate no deeper than the Nth best genotype by likelihood when
|
|
871 genotyping. default: 6.
|
|
872 --posterior-integration-limits N,M Integrate all genotype combinations in our posterior space
|
|
873 which include no more than N samples with their Mth best
|
|
874 data likelihood. default: 1,3.
|
|
875 --exclude-unobserved-genotypes Skip sample genotypings for which the sample has no supporting reads.
|
|
876 --genotype-variant-threshold N Limit posterior integration to samples where the second-best
|
|
877 genotype likelihood is no more than log(N) from the highest
|
|
878 genotype likelihood for the sample. default: ~unbounded
|
|
879 --use-mapping-quality Use mapping quality of alleles when calculating data likelihoods.
|
|
880 --harmonic-indel-quality Use a weighted sum of base qualities around an indel, scaled by the
|
|
881 distance from the indel. By default use a minimum BQ in flanking sequence.
|
|
882 --read-dependence-factor N Incorporate non-independence of reads by scaling successive
|
|
883 observations by this factor during data likelihood
|
|
884 calculations. default: 0.9
|
|
885 --genotype-qualities Calculate the marginal probability of genotypes and report as GQ in
|
|
886 each sample field in the VCF output.
|
|
887
|
|
888 ------
|
|
889
|
|
890 **Acknowledgments**
|
|
891
|
|
892 The initial version of the wrapper was produced by Dan Blankenberg and upgraded by Anton Nekrutenko.
|
|
893 TNG was developed by Bjoern Gruening.
|
|
894 ]]>
|
|
895 </help>
|
|
896 <expand macro="citations">
|
|
897 <citation type="bibtex">
|
|
898 @article{Tange2011a,
|
|
899 title = {GNU Parallel - The Command-Line Power Tool},
|
|
900 author = {O. Tange},
|
|
901 address = {Frederiksberg, Denmark},
|
|
902 journal = {;login: The USENIX Magazine},
|
|
903 month = {Feb},
|
|
904 number = {1},
|
|
905 volume = {36},
|
|
906 url = {http://www.gnu.org/s/parallel},
|
|
907 year = {2011},
|
|
908 pages = {42-47}
|
|
909 }
|
|
910 </citation>
|
|
911 </expand>
|
|
912 </tool>
|