Mercurial > repos > jorrit > obotest
comparison obo_scripts.xml @ 3:0c42bebab126 draft default tip
Uploaded
| author | jorrit |
|---|---|
| date | Thu, 07 Feb 2013 16:58:40 -0500 |
| parents | e71204d5e03c |
| children |
comparison
equal
deleted
inserted
replaced
| 2:e71204d5e03c | 3:0c42bebab126 |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 <tool id="fetch_obo_ontology2" name="FetchOboOntology2" version="0.0.8"> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="0.9.6_9608597d12e127c847ae03aa03440ab63992fedf">freebayes</requirement> | |
| 5 <requirement type="package" version="0.1.18">samtools</requirement> | |
| 6 </requirements> | |
| 7 <description> - obo scripts</description> | |
| 8 <command> | |
| 9 ##set up input files | |
| 10 #set $reference_fasta_filename = "localref.fa" | |
| 11 #if str( $reference_source.reference_source_selector ) == "history": | |
| 12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
| 13 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && | |
| 14 #else: | |
| 15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
| 16 #end if | |
| 17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
| 18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && | |
| 19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
| 20 #end for | |
| 21 ##finished setting up inputs | |
| 22 | |
| 23 ##start FreeBayes commandline | |
| 24 freebayes | |
| 25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
| 26 --bam "localbam_${bam_count}.bam" | |
| 27 #end for | |
| 28 --fasta-reference "${reference_fasta_filename}" | |
| 29 | |
| 30 ##outputs | |
| 31 --vcf "${output_vcf}" | |
| 32 | |
| 33 ##advanced options | |
| 34 #if str( $options_type.options_type_selector ) == "advanced": | |
| 35 ##additional outputs | |
| 36 #if $options_type.output_trace_option: | |
| 37 --trace "${output_trace}" | |
| 38 #end if | |
| 39 #if $options_type.output_failed_alleles_option: | |
| 40 --failed-alleles "${output_failed_alleles_bed}" | |
| 41 #end if | |
| 42 | |
| 43 ##additional inputs | |
| 44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file": | |
| 45 --targets "${options_type.target_limit_type.input_target_bed}" | |
| 46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region": | |
| 47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}" | |
| 48 #end if | |
| 49 #if $options_type.input_sample_file: | |
| 50 --samples "${options_type.input_sample_file}" | |
| 51 #end if | |
| 52 #if $options_type.input_populations_file: | |
| 53 --populations "${options_type.input_populations_file}" | |
| 54 #end if | |
| 55 #if $options_type.input_cnv_map_bed: | |
| 56 --cnv-map "${options_type.input_cnv_map_bed}" | |
| 57 #end if | |
| 58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf": | |
| 59 --variant-input "${options_type.input_variant_type.input_variant_vcf}" | |
| 60 ${options_type.input_variant_type.only_use_input_alleles} | |
| 61 #end if | |
| 62 #if $options_type.haplotype_basis_alleles: | |
| 63 --haplotype-basis-alleles "${options_type.haplotype_basis_alleles}" | |
| 64 #end if | |
| 65 | |
| 66 | |
| 67 ##reporting | |
| 68 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set": | |
| 69 --pvar "${options_type.section_reporting_type.pvar}" | |
| 70 ${options_type.section_reporting_type.show_reference_repeats} | |
| 71 #end if | |
| 72 | |
| 73 ##population model | |
| 74 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set": | |
| 75 --theta "${options_type.section_population_model_type.theta}" | |
| 76 --ploidy "${options_type.section_population_model_type.ploidy}" | |
| 77 ${options_type.section_population_model_type.pooled} | |
| 78 #end if | |
| 79 | |
| 80 ##reference allele | |
| 81 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele": | |
| 82 --use-reference-allele | |
| 83 ${options_type.use_reference_allele_type.diploid_reference} | |
| 84 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}" | |
| 85 #end if | |
| 86 | |
| 87 ##allele scope | |
| 88 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set": | |
| 89 ${options_type.section_allele_scope_type.no_snps} | |
| 90 ${options_type.section_allele_scope_type.no_indels} | |
| 91 ${options_type.section_allele_scope_type.no_mnps} | |
| 92 ${options_type.section_allele_scope_type.no_complex} | |
| 93 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}" | |
| 94 #if $options_type.section_allele_scope_type.max_complex_gap: | |
| 95 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}" | |
| 96 #end if | |
| 97 #end if | |
| 98 | |
| 99 ##indel realignment | |
| 100 ${options_type.left_align_indels} | |
| 101 | |
| 102 ##input filters | |
| 103 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set": | |
| 104 ${options_type.section_input_filters_type.use_duplicate_reads} | |
| 105 #if str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "apply_filters": | |
| 106 --min-mapping-quality "${options_type.section_input_filters_type.quality_filter_type.min_mapping_quality}" | |
| 107 --min-base-quality "${options_type.section_input_filters_type.quality_filter_type.min_base_quality}" | |
| 108 --min-supporting-quality "${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.quality_filter_type.min_supporting_quality_bq}" | |
| 109 #elif str( $options_type.section_input_filters_type.quality_filter_type.quality_filter_type_selector ) == "standard_filters": | |
| 110 --standard-filters | |
| 111 #end if | |
| 112 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}" | |
| 113 #if $options_type.section_input_filters_type.read_mismatch_limit: | |
| 114 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}" | |
| 115 #end if | |
| 116 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}" | |
| 117 #if $options_type.section_input_filters_type.read_snp_limit: | |
| 118 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}" | |
| 119 #end if | |
| 120 #if $options_type.section_input_filters_type.read_indel_limit: | |
| 121 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}" | |
| 122 #end if | |
| 123 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}" | |
| 124 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}" | |
| 125 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}" | |
| 126 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}" | |
| 127 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}" | |
| 128 --min-coverage "${options_type.section_input_filters_type.min_coverage}" | |
| 129 #end if | |
| 130 | |
| 131 ##bayesian priors | |
| 132 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set": | |
| 133 ${options_type.section_bayesian_priors_type.no_ewens_priors} | |
| 134 ${options_type.section_bayesian_priors_type.no_population_priors} | |
| 135 ${options_type.section_bayesian_priors_type.hwe_priors} | |
| 136 #end if | |
| 137 | |
| 138 ##observation prior expectations | |
| 139 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set": | |
| 140 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors} | |
| 141 ${options_type.section_observation_prior_expectations_type.allele_balance_priors} | |
| 142 #end if | |
| 143 | |
| 144 ##algorithmic features | |
| 145 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set": | |
| 146 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}" | |
| 147 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}" | |
| 148 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}" | |
| 149 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}" | |
| 150 ${options_type.section_algorithmic_features_type.no_permute} | |
| 151 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes} | |
| 152 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold: | |
| 153 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}" | |
| 154 #end if | |
| 155 ${options_type.section_algorithmic_features_type.use_mapping_quality} | |
| 156 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}" | |
| 157 ${options_type.section_algorithmic_features_type.no_marginals} | |
| 158 #end if | |
| 159 | |
| 160 #end if | |
| 161 </command> | |
| 162 <inputs> | |
| 163 <conditional name="reference_source"> | |
| 164 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
| 165 <option value="cached">Locally cached</option> | |
| 166 <option value="history">History</option> | |
| 167 </param> | |
| 168 <when value="cached"> | |
| 169 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
| 170 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
| 171 <validator type="unspecified_build" /> | |
| 172 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> | |
| 173 </param> | |
| 174 </repeat> | |
| 175 <param name="ref_file" type="select" label="Using reference genome"> | |
| 176 <options from_data_table="sam_fa_indexes"> | |
| 177 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> | |
| 178 </options> | |
| 179 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
| 180 </param> | |
| 181 </when> | |
| 182 <when value="history"> <!-- FIX ME!!!! --> | |
| 183 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
| 184 <param name="input_bam" type="data" format="bam" label="BAM file" /> | |
| 185 </repeat> | |
| 186 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
| 187 </when> | |
| 188 </conditional> | |
| 189 | |
| 190 <conditional name="options_type"> | |
| 191 <param name="options_type_selector" type="select" label="Basic or Advanced options"> | |
| 192 <option value="basic" selected="True">Basic</option> | |
| 193 <option value="advanced">Advanced</option> | |
| 194 </param> | |
| 195 <when value="basic"> | |
| 196 <!-- Do nothing here --> | |
| 197 </when> | |
| 198 <when value="advanced"> | |
| 199 | |
| 200 <!-- output --> | |
| 201 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" /> | |
| 202 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" /> | |
| 203 | |
| 204 | |
| 205 <!-- input --> | |
| 206 <conditional name="target_limit_type"> | |
| 207 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets"> | |
| 208 <option value="do_not_limit" selected="True">Do not limit</option> | |
| 209 <option value="limit_by_target_file">Limit by target file</option> | |
| 210 <option value="limit_by_region">Limit to region</option> | |
| 211 </param> | |
| 212 <when value="do_not_limit"> | |
| 213 <!-- Do nothing here --> | |
| 214 </when> | |
| 215 <when value="limit_by_target_file"> | |
| 216 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." /> | |
| 217 </when> | |
| 218 <when value="limit_by_region"> | |
| 219 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? --> | |
| 220 <param name="region_start" type="integer" label="Region Start" value="" /> | |
| 221 <param name="region_end" type="integer" label="Region End" value="" /> | |
| 222 </when> | |
| 223 </conditional> | |
| 224 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" /> | |
| 225 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" /> | |
| 226 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" /> | |
| 227 <conditional name="input_variant_type"> | |
| 228 <param name="input_variant_type_selector" type="select" label="Provide variants file"> | |
| 229 <option value="do_not_provide" selected="True">Do not provide</option> | |
| 230 <option value="provide_vcf">Provide VCF file</option> | |
| 231 </param> | |
| 232 <when value="do_not_provide"> | |
| 233 <!-- Do nothing here --> | |
| 234 </when> | |
| 235 <when value="provide_vcf"> | |
| 236 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" /> | |
| 237 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> | |
| 238 </when> | |
| 239 </conditional> | |
| 240 <param name="haplotype_basis_alleles" type="data" format="vcf" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" optional="True" /> | |
| 241 | |
| 242 <!-- reporting --> | |
| 243 <conditional name="section_reporting_type"> | |
| 244 <param name="section_reporting_type_selector" type="select" label="Set Reporting options"> | |
| 245 <option value="do_not_set" selected="True">Do not set</option> | |
| 246 <option value="set">Set</option> | |
| 247 </param> | |
| 248 <when value="do_not_set"> | |
| 249 <!-- do nothing here --> | |
| 250 </when> | |
| 251 <when value="set"> | |
| 252 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" /> | |
| 253 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" /> | |
| 254 </when> | |
| 255 </conditional> | |
| 256 | |
| 257 | |
| 258 <!-- population model --> | |
| 259 <conditional name="section_population_model_type"> | |
| 260 <param name="section_population_model_type_selector" type="select" label="Set population model options"> | |
| 261 <option value="do_not_set" selected="True">Do not set</option> | |
| 262 <option value="set">Set</option> | |
| 263 </param> | |
| 264 <when value="do_not_set"> | |
| 265 <!-- do nothing here --> | |
| 266 </when> | |
| 267 <when value="set"> | |
| 268 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/> | |
| 269 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" /> | |
| 270 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." /> | |
| 271 </when> | |
| 272 </conditional> | |
| 273 | |
| 274 <!-- reference allele --> | |
| 275 <conditional name="use_reference_allele_type"> | |
| 276 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis"> | |
| 277 <option value="do_not_include_reference_allele" selected="True">Do not include</option> | |
| 278 <option value="include_reference_allele">Include</option> | |
| 279 </param> | |
| 280 <when value="do_not_include_reference_allele"> | |
| 281 <!-- Do nothing here --> | |
| 282 </when> | |
| 283 <when value="include_reference_allele"> | |
| 284 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" /> | |
| 285 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" /> | |
| 286 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" /> | |
| 287 </when> | |
| 288 </conditional> | |
| 289 | |
| 290 <!-- allele scope --> | |
| 291 <conditional name="section_allele_scope_type"> | |
| 292 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options"> | |
| 293 <option value="do_not_set" selected="True">Do not set</option> | |
| 294 <option value="set">Set</option> | |
| 295 </param> | |
| 296 <when value="do_not_set"> | |
| 297 <!-- do nothing here --> | |
| 298 </when> | |
| 299 <when value="set"> | |
| 300 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" /> | |
| 301 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" /> | |
| 302 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" /> | |
| 303 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" /> | |
| 304 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" /> | |
| 305 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/> | |
| 306 </when> | |
| 307 </conditional> | |
| 308 | |
| 309 <!-- indel realignment --> | |
| 310 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" /> | |
| 311 | |
| 312 <!-- input filters --> | |
| 313 <conditional name="section_input_filters_type"> | |
| 314 <param name="section_input_filters_type_selector" type="select" label="Set input filters options"> | |
| 315 <option value="do_not_set" selected="True">Do not set</option> | |
| 316 <option value="set">Set</option> | |
| 317 </param> | |
| 318 <when value="do_not_set"> | |
| 319 <!-- do nothing here --> | |
| 320 </when> | |
| 321 <when value="set"> | |
| 322 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" /> | |
| 323 <conditional name="quality_filter_type"> | |
| 324 <param name="quality_filter_type_selector" type="select" label="Apply Quality filters"> | |
| 325 <option value="standard_filters" selected="True">Apply standard</option> | |
| 326 <option value="apply_filters">Apply specified</option> | |
| 327 </param> | |
| 328 <when value="standard_filters"> | |
| 329 <!-- Do nothing here --> <!-- standard-filters --> | |
| 330 </when> | |
| 331 <when value="apply_filters"> | |
| 332 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="0" /> | |
| 333 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="0" /> | |
| 334 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" /> | |
| 335 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" /> | |
| 336 </when> | |
| 337 </conditional> | |
| 338 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" /> | |
| 339 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
| 340 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" /> | |
| 341 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
| 342 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" /> | |
| 343 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" /> | |
| 344 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" /> | |
| 345 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" /> | |
| 346 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" /> | |
| 347 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" /> | |
| 348 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" /> | |
| 349 </when> | |
| 350 </conditional> | |
| 351 | |
| 352 | |
| 353 <!-- bayesian priors --> | |
| 354 <conditional name="section_bayesian_priors_type"> | |
| 355 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options"> | |
| 356 <option value="do_not_set" selected="True">Do not set</option> | |
| 357 <option value="set">Set</option> | |
| 358 </param> | |
| 359 <when value="do_not_set"> | |
| 360 <!-- do nothing here --> | |
| 361 </when> | |
| 362 <when value="set"> | |
| 363 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" /> | |
| 364 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" /> | |
| 365 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" /> | |
| 366 </when> | |
| 367 </conditional> | |
| 368 | |
| 369 <!-- observation prior expectations --> | |
| 370 <conditional name="section_observation_prior_expectations_type"> | |
| 371 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options"> | |
| 372 <option value="do_not_set" selected="True">Do not set</option> | |
| 373 <option value="set">Set</option> | |
| 374 </param> | |
| 375 <when value="do_not_set"> | |
| 376 <!-- do nothing here --> | |
| 377 </when> | |
| 378 <when value="set"> | |
| 379 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" /> | |
| 380 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" /> | |
| 381 </when> | |
| 382 </conditional> | |
| 383 | |
| 384 | |
| 385 <!-- algorithmic features --> | |
| 386 <conditional name="section_algorithmic_features_type"> | |
| 387 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options"> | |
| 388 <option value="do_not_set" selected="True">Do not set</option> | |
| 389 <option value="set">Set</option> | |
| 390 </param> | |
| 391 <when value="do_not_set"> | |
| 392 <!-- do nothing here --> | |
| 393 </when> | |
| 394 <when value="set"> | |
| 395 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" /> | |
| 396 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" /> | |
| 397 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" /> | |
| 398 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" /> | |
| 399 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" /> | |
| 400 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" /> | |
| 401 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" /> | |
| 402 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" /> | |
| 403 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" /> | |
| 404 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" /> | |
| 405 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" /> | |
| 406 </when> | |
| 407 </conditional> | |
| 408 | |
| 409 | |
| 410 </when> | |
| 411 </conditional> | |
| 412 | |
| 413 </inputs> | |
| 414 <outputs> | |
| 415 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> | |
| 416 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> | |
| 417 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter> | |
| 418 </data> | |
| 419 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> | |
| 420 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter> | |
| 421 </data> | |
| 422 </outputs> | |
| 423 <tests> | |
| 424 <test> | |
| 425 <param name="reference_source_selector" value="history" /> | |
| 426 <param name="ref_file" ftype="fasta" value="phiX.fasta"/> | |
| 427 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/> | |
| 428 <param name="options_type_selector" value="basic"/> | |
| 429 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/> | |
| 430 </test> | |
| 431 </tests> | |
| 432 <help> | |
| 433 **What it does** | |
| 434 | |
| 435 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file. | |
| 436 | |
| 437 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners. | |
| 438 | |
| 439 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development. | |
| 440 | |
| 441 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes. | |
| 442 | |
| 443 ------ | |
| 444 | |
| 445 **Inputs** | |
| 446 | |
| 447 FreeBayes accepts an input aligned BAM file. | |
| 448 | |
| 449 | |
| 450 **Outputs** | |
| 451 | |
| 452 The output is in the VCF format. | |
| 453 | |
| 454 ------- | |
| 455 | |
| 456 **Settings**:: | |
| 457 | |
| 458 input and output: | |
| 459 | |
| 460 -b --bam FILE Add FILE to the set of BAM files to be analyzed. | |
| 461 -c --stdin Read BAM input on stdin. | |
| 462 -v --vcf FILE Output VCF-format results to FILE. | |
| 463 -f --fasta-reference FILE | |
| 464 Use FILE as the reference sequence for analysis. | |
| 465 An index file (FILE.fai) will be created if none exists. | |
| 466 If neither --targets nor --region are specified, FreeBayes | |
| 467 will analyze every position in this reference. | |
| 468 -t --targets FILE | |
| 469 Limit analysis to targets listed in the BED-format FILE. | |
| 470 -r --region <chrom>:<start_position>..<end_position> | |
| 471 Limit analysis to the specified region, 0-base coordinates, | |
| 472 end_position not included (same as BED format). | |
| 473 -s --samples FILE | |
| 474 Limit analysis to samples listed (one per line) in the FILE. | |
| 475 By default FreeBayes will analyze all samples in its input | |
| 476 BAM files. | |
| 477 --populations FILE | |
| 478 Each line of FILE should list a sample and a population which | |
| 479 it is part of. The population-based bayesian inference model | |
| 480 will then be partitioned on the basis of the populations. | |
| 481 -A --cnv-map FILE | |
| 482 Read a copy number map from the BED file FILE, which has | |
| 483 the format: | |
| 484 reference sequence, start, end, sample name, copy number | |
| 485 ... for each region in each sample which does not have the | |
| 486 default copy number as set by --ploidy. | |
| 487 -L --trace FILE Output an algorithmic trace to FILE. | |
| 488 --failed-alleles FILE | |
| 489 Write a BED file of the analyzed positions which do not | |
| 490 pass --pvar to FILE. | |
| 491 -@ --variant-input VCF | |
| 492 Use variants reported in VCF file as input to the algorithm. | |
| 493 A report will be generated for every record in the VCF file. | |
| 494 -l --only-use-input-alleles | |
| 495 Only provide variant calls and genotype likelihoods for sites | |
| 496 and alleles which are provided in the VCF input, and provide | |
| 497 output in the VCF for all input alleles, not just those which | |
| 498 have support in the data. | |
| 499 --haplotype-basis-alleles VCF | |
| 500 When specified, only variant alleles provided in this input | |
| 501 VCF will be used for the construction of complex or haplotype | |
| 502 alleles. | |
| 503 | |
| 504 reporting: | |
| 505 | |
| 506 -P --pvar N Report sites if the probability that there is a polymorphism | |
| 507 at the site is greater than N. default: 0.0001 | |
| 508 -_ --show-reference-repeats | |
| 509 Calculate and show information about reference repeats in | |
| 510 the VCF output. | |
| 511 | |
| 512 population model: | |
| 513 | |
| 514 -T --theta N The expected mutation rate or pairwise nucleotide diversity | |
| 515 among the population under analysis. This serves as the | |
| 516 single parameter to the Ewens Sampling Formula prior model | |
| 517 default: 0.001 | |
| 518 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2 | |
| 519 -J --pooled Assume that samples result from pooled sequencing. | |
| 520 When using this flag, set --ploidy to the number of | |
| 521 alleles in each sample. | |
| 522 | |
| 523 reference allele: | |
| 524 | |
| 525 -Z --use-reference-allele | |
| 526 This flag includes the reference allele in the analysis as | |
| 527 if it is another sample from the same population. | |
| 528 -H --diploid-reference | |
| 529 If using the reference sequence as a sample (-Z), | |
| 530 treat it as diploid. default: false (reference is haploid) | |
| 531 --reference-quality MQ,BQ | |
| 532 Assign mapping quality of MQ to the reference allele at each | |
| 533 site and base quality of BQ. default: 100,60 | |
| 534 | |
| 535 allele scope: | |
| 536 | |
| 537 -I --no-snps Ignore SNP alleles. | |
| 538 -i --no-indels Ignore insertion and deletion alleles. | |
| 539 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs. | |
| 540 -u --no-complex Ignore complex events (composites of other classes). | |
| 541 -n --use-best-n-alleles N | |
| 542 Evaluate only the best N SNP alleles, ranked by sum of | |
| 543 supporting quality scores. (Set to 0 to use all; default: all) | |
| 544 -E --max-complex-gap N | |
| 545 Allow complex alleles with contiguous embedded matches of up | |
| 546 to this length. | |
| 547 | |
| 548 indel realignment: | |
| 549 | |
| 550 -O --left-align-indels | |
| 551 Left-realign and merge gaps embedded in reads. default: false | |
| 552 | |
| 553 input filters: | |
| 554 | |
| 555 -4 --use-duplicate-reads | |
| 556 Include duplicate-marked alignments in the analysis. | |
| 557 default: exclude duplicates | |
| 558 -m --min-mapping-quality Q | |
| 559 Exclude alignments from analysis if they have a mapping | |
| 560 quality less than Q. default: 30 | |
| 561 -q --min-base-quality Q | |
| 562 Exclude alleles from analysis if their supporting base | |
| 563 quality is less than Q. default: 20 | |
| 564 -R --min-supporting-quality MQ,BQ | |
| 565 In order to consider an alternate allele, at least one supporting | |
| 566 alignment must have mapping quality MQ, and one supporting | |
| 567 allele must have base quality BQ. default: 0,0, unset | |
| 568 -Q --mismatch-base-quality-threshold Q | |
| 569 Count mismatches toward --read-mismatch-limit if the base | |
| 570 quality of the mismatch is >= Q. default: 10 | |
| 571 -U --read-mismatch-limit N | |
| 572 Exclude reads with more than N mismatches where each mismatch | |
| 573 has base quality >= mismatch-base-quality-threshold. | |
| 574 default: ~unbounded | |
| 575 -z --read-max-mismatch-fraction N | |
| 576 Exclude reads with more than N [0,1] fraction of mismatches where | |
| 577 each mismatch has base quality >= mismatch-base-quality-threshold | |
| 578 default: 1.0 | |
| 579 -$ --read-snp-limit N | |
| 580 Exclude reads with more than N base mismatches, ignoring gaps | |
| 581 with quality >= mismatch-base-quality-threshold. | |
| 582 default: ~unbounded | |
| 583 -e --read-indel-limit N | |
| 584 Exclude reads with more than N separate gaps. | |
| 585 default: ~unbounded | |
| 586 -0 --standard-filters Use stringent input base and mapping quality filters | |
| 587 Equivalent to -m 30 -q 20 -R 0 -S 0 | |
| 588 -x --indel-exclusion-window | |
| 589 Ignore portions of alignments this many bases from a | |
| 590 putative insertion or deletion allele. default: 0 | |
| 591 -F --min-alternate-fraction N | |
| 592 Require at least this fraction of observations supporting | |
| 593 an alternate allele within a single individual in the | |
| 594 in order to evaluate the position. default: 0.0 | |
| 595 -C --min-alternate-count N | |
| 596 Require at least this count of observations supporting | |
| 597 an alternate allele within a single individual in order | |
| 598 to evaluate the position. default: 1 | |
| 599 -3 --min-alternate-qsum N | |
| 600 Require at least this sum of quality of observations supporting | |
| 601 an alternate allele within a single individual in order | |
| 602 to evaluate the position. default: 0 | |
| 603 -G --min-alternate-total N | |
| 604 Require at least this count of observations supporting | |
| 605 an alternate allele within the total population in order | |
| 606 to use the allele in analysis. default: 1 | |
| 607 -! --min-coverage N | |
| 608 Require at least this coverage to process a site. default: 0 | |
| 609 | |
| 610 bayesian priors: | |
| 611 | |
| 612 -Y --no-ewens-priors | |
| 613 Turns off the Ewens' Sampling Formula component of the priors. | |
| 614 -k --no-population-priors | |
| 615 Equivalent to --pooled --no-ewens-priors | |
| 616 -w --hwe-priors Use the probability of the combination arising under HWE given | |
| 617 the allele frequency as estimated by observation frequency. | |
| 618 | |
| 619 observation prior expectations: | |
| 620 | |
| 621 -V --binomial-obs-priors | |
| 622 Incorporate expectations about osbervations into the priors, | |
| 623 Uses read placement probability, strand balance probability, | |
| 624 and read position (5'-3') probability. | |
| 625 -a --allele-balance-priors | |
| 626 Use aggregate probability of observation balance between alleles | |
| 627 as a component of the priors. Best for observations with minimal | |
| 628 inherent reference bias. | |
| 629 | |
| 630 algorithmic features: | |
| 631 | |
| 632 -M --site-selection-max-iterations N | |
| 633 Uses hill-climbing algorithm to search posterior space for N | |
| 634 iterations to determine if the site should be evaluated. Set to 0 | |
| 635 to prevent use of this algorithm for site selection, and | |
| 636 to a low integer for improvide site selection at a slight | |
| 637 performance penalty. default: 5. | |
| 638 -B --genotyping-max-iterations N | |
| 639 Iterate no more than N times during genotyping step. default: 25. | |
| 640 --genotyping-max-banddepth N | |
| 641 Integrate no deeper than the Nth best genotype by likelihood when | |
| 642 genotyping. default: 6. | |
| 643 -W --posterior-integration-limits N,M | |
| 644 Integrate all genotype combinations in our posterior space | |
| 645 which include no more than N samples with their Mth best | |
| 646 data likelihood. default: 1,3. | |
| 647 -K --no-permute | |
| 648 Do not scale prior probability of genotype combination given allele | |
| 649 frequency by the number of permutations of included genotypes. | |
| 650 -N --exclude-unobserved-genotypes | |
| 651 Skip sample genotypings for which the sample has no supporting reads. | |
| 652 -S --genotype-variant-threshold N | |
| 653 Limit posterior integration to samples where the second-best | |
| 654 genotype likelihood is no more than log(N) from the highest | |
| 655 genotype likelihood for the sample. default: ~unbounded | |
| 656 -j --use-mapping-quality | |
| 657 Use mapping quality of alleles when calculating data likelihoods. | |
| 658 -D --read-dependence-factor N | |
| 659 Incorporate non-independence of reads by scaling successive | |
| 660 observations by this factor during data likelihood | |
| 661 calculations. default: 0.9 | |
| 662 -= --no-marginals | |
| 663 Do not calculate the marginal probability of genotypes. Saves | |
| 664 time and improves scaling performance in large populations. | |
| 665 | |
| 666 | |
| 667 ------ | |
| 668 | |
| 669 **Citation** | |
| 670 | |
| 671 For the underlying tool, please cite `Erik Garrison and Gabor Marth. Haplotype-based variant detection from short-read sequencing <http://arxiv.org/abs/1207.3907>`_. | |
| 672 | |
| 673 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | |
| 674 | |
| 675 </help> | |
| 676 </tool> |
