Mercurial > repos > jbrayet > samtools_1_2_docker
changeset 7:2f69634a480e draft
Uploaded
author | jbrayet |
---|---|
date | Wed, 25 Nov 2015 03:22:57 -0500 |
parents | fdf2125c9f02 |
children | bb21f59022f8 |
files | samtools_mpileup.xml |
diffstat | 1 files changed, 368 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_mpileup.xml Wed Nov 25 03:22:57 2015 -0500 @@ -0,0 +1,368 @@ +<tool id="samtools_mpileup" name="MPileup" version="2.1.1"> + <description>call variants</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> + <![CDATA[ + #if $reference_source.reference_source_selector == "history": + ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup + #else: + samtools mpileup + #end if + #if $reference_source.reference_source_selector != "history": + -f "${reference_source.ref_file.fields.path}" + #else: + -f "${reference_source.ref_file}" + #end if + #for $bam in $reference_source.input_bam: + "${bam}" + #end for + #if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": + #if $advanced_options.filter_by_flags.require_flags: + --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} + #end if + #if $advanced_options.filter_by_flags.exclude_flags: + --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} + #end if + #end if + #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": + -l "$pasted_regions" + #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" + -l "$advanced_options.limit_by_region.bed_regions" + #end if + #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": + -G "$excluded_read_groups" + #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" + -G "$advanced_options.exclude_read_group.read_groups" + #end if + ${advanced_options.skip_anomalous_read_pairs} + ${advanced_options.disable_probabilistic_realignment} + -C "${advanced_options.coefficient_for_downgrading}" + -d "${advanced_options.max_reads_per_bam}" + ${advanced_options.extended_BAQ_computation} + -q "${advanced_options.minimum_mapping_quality}" + -Q "${advanced_options.minimum_base_quality}" + #if str( $advanced_options.region_string ): + -r "${advanced_options.region_string}" + #end if + + #end if + #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': + ## + + ${genotype_likelihood_computation_type.output_format} + ${genotype_likelihood_computation_type.compressed} + + #if str( $genotype_likelihood_computation_type.output_tags ) != "None": + --output-tags "${genotype_likelihood_computation_type.output_tags}" + #end if + + #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': + -o "${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}" + -e "${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}" + -h "${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}" + -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" + -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" + --open-prob "${genotype_likelihood_computation_type.perform_indel_calling.open_seq_error_probability}" + -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" + ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} + #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ): + -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ] ) }" + #end if + #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling': + -I + #end if + + + #else: + ${genotype_likelihood_computation_type.base_position_on_reads} + ${genotype_likelihood_computation_type.output_mapping_quality} + #end if + --output "$output_mpileup" 2> "$output_log" + ]]> + </command> + <inputs> + <conditional name="reference_source"> + <param label="Choose the source for the reference genome" name="reference_source_selector" type="select"> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from the history</option> + </param> + <when value="cached"> + <param format="bam" label="BAM file(s)" name="input_bam" type="data" min="1" multiple="True"> + <validator type="unspecified_build" /> + <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" /> + </param> + <param label="Using reference genome" name="ref_file" type="select"> + <options from_data_table="fasta_indexes" /> + </param> + </when> + <when value="history"> + <param format="bam" label="BAM file(s)" name="input_bam" type="data" min="1" multiple="True"> + <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> + </param> + <param format="fasta" label="Using reference genome" name="ref_file" type="data" /> + </when> + </conditional> + <conditional name="genotype_likelihood_computation_type"> + <param label="Genotype Likelihood Computation" name="genotype_likelihood_computation_type_selector" type="select"> + <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option> + <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option> + </param> + <when value="perform_genotype_likelihood_computation"> + <param label="Choose the output format" name="output_format" type="select"> + <option value="--VCF">VCF</option> + <option value="--BCF">BCF</option> + </param> + <param checked="False" falsevalue="--uncompressed" label="Compress output" name="compressed" truevalue="" type="boolean" help="--incompressed; default=False"/> + <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags"> + <option value="DP">DP (Number of high-quality bases)</option> + <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option> + <option value="DV">DV (Number of high-quality non-reference bases)</option> + <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option> + <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option> + <option value="SP">SP (Phred-scaled strand bias P-value)</option> + </param> + <conditional name="perform_indel_calling"> + <param label="Perform INDEL calling" name="perform_indel_calling_selector" type="select"> + <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option> + <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option> + <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> + </param> + <when value="perform_indel_calling_def" /> + <when value="perform_indel_calling"> + <param label="Phred-scaled gap open sequencing error probability" name="gap_open_sequencing_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> + <param label="Phred-scaled gap extension sequencing error probability" name="gap_extension_sequencing_error_probability" type="integer" value="20" help="--ext-prob; Reducing this value leads to longer indels. default=20"/> + <param label="Coefficient for modeling homopolymer errors." name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" help="--tandem-qual; default=100"/> + <param label="Skip INDEL calling if the average per-sample depth is above" name="skip_indel_calling_above_sample_depth" type="integer" value="250" help="--max-idepth; default=250"/> + <param label="Minimum gapped reads for indel candidates" name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" help="--min-ireads; default=1"/> + <param label="Phred-scaled gap open sequencing error probability" name="open_seq_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> + <param label="Minimum fraction of gapped reads" name="minimum_gapped_read_fraction" type="float" value="0.002" help="--gap-frac; default=0.002"/> + <param checked="False" falsevalue="" label="Apply --min-ireads and --gap-frac values on a per-sample basis" name="gapped_read_per_sample" truevalue="-p" type="boolean" help="--per-sample-mF; by default both options are applied to reads pooled from all samples"/> + <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> + <param label="Platform to use for INDEL candidates" name="platform_entry" type="text" value="" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/> + </repeat> + </when> + <when value="do_not_perform_indel_calling" /> + </conditional> + + </when> + <when value="do_not_perform_genotype_likelihood_computation"> + <param checked="False" falsevalue="" label="Output base positions on reads" name="base_position_on_reads" truevalue="-O" type="boolean" help="--output-BP"/> + <param checked="False" falsevalue="" label="Output mapping quality" name="output_mapping_quality" truevalue="-s" type="boolean" help="--output-MQ"/> + </when> + </conditional> + <conditional name="advanced_options"> + <param label="Set advanced options" name="advanced_options_selector" type="select"> + <option selected="True" value="basic">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <conditional name="filter_by_flags"> + <param label="Set filter by flags" name="filter_flags" type="select"> + <option selected="True" value="nofilter">Do not filter</option> + <option value="filter">Filter by flags to exclude or require</option> + </param> + <when value="filter"> + <param display="checkboxes" label="Require" multiple="True" name="require_flags" type="select" help="--incl-flags"> + <option value="1">Read is paired</option> + <option value="2">Read is mapped in a proper pair</option> + <option value="4">The read is unmapped</option> + <option value="8">The mate is unmapped</option> + <option value="16">Read strand</option> + <option value="32">Mate strand</option> + <option value="64">Read is the first in a pair</option> + <option value="128">Read is the second in a pair</option> + <option value="256">The alignment or this read is not primary</option> + <option value="512">The read fails platform/vendor quality checks</option> + <option value="1024">The read is a PCR or optical duplicate</option> + </param> + <param display="checkboxes" label="Exclude" multiple="True" name="exclude_flags" type="select" help="--excl-flags"> + <option value="1">Read is paired</option> + <option value="2">Read is mapped in a proper pair</option> + <option value="4">The read is unmapped</option> + <option value="8">The mate is unmapped</option> + <option value="16">Read strand</option> + <option value="32">Mate strand</option> + <option value="64">Read is the first in a pair</option> + <option value="128">Read is the second in a pair</option> + <option value="256">The alignment or this read is not primary</option> + <option value="512">The read fails platform/vendor quality checks</option> + <option value="1024">The read is a PCR or optical duplicate</option> + </param> + </when> + <when value="nofilter" /> + </conditional> + <conditional name="limit_by_region"> + <param label="Select regions to call" name="limit_by_regions" type="select"> + <option selected="True" value="no_limit">Do not limit</option> + <option value="history">From an uploaded BED file (--positions)</option> + <option value="paste">Paste a list of regions or BED (--region)</option> + </param> + <when value="history"> + <param format="bed" label="BED file" name="bed_regions" type="data" help="--positions"> + <validator type="dataset_ok_validator" /> + </param> + </when> + <when value="paste"> + <param area="true" help="Paste a list of regions in BED format or as a list of chromosomes and positions" label="Regions" name="region_paste" size="10x35" type="text"/> + </when> + <when value="no_limit" /> + </conditional> + <conditional name="exclude_read_group"> + <param label="Select read groups to exclude" name="exclude_read_groups" type="select" help="--exclude-RG"> + <option selected="True" value="no_limit">Do not exclude</option> + <option value="history">From an uploaded text file</option> + <option value="paste">Paste a list of read groups</option> + </param> + <when value="history"> + <param format="txt" label="Text file" name="read_groups" type="data"> + <validator type="dataset_ok_validator" /> + </param> + </when> + <when value="paste"> + <param area="true" help="Paste a list of read groups" label="Read groups" name="group_paste" size="10x35" type="text" /> + </when> + <when value="no_limit" /> + </conditional> + <param checked="False" falsevalue="" label="Disable read-pair overlap detection" name="ignore_overlaps" truevalue="-x" type="boolean" help="--ignore-overlaps"/> + <param checked="False" falsevalue="" label="Do not skip anomalous read pairs in variant calling" name="skip_anomalous_read_pairs" truevalue="-A" type="boolean" help="--count-orphans"/> + <param checked="False" falsevalue="" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" name="disable_probabilistic_realignment" truevalue="-B" type="boolean" help="--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments"/> + <param label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" name="coefficient_for_downgrading" type="integer" value="0" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/> + <param label="Max reads per BAM" max="1024" min="1" name="max_reads_per_bam" type="integer" value="250" help="--max-depth; default=250"/> + <param checked="False" falsevalue="" label="Redo BAQ computation" name="extended_BAQ_computation" truevalue="-E" type="boolean" help="--redo-BAQ; ignore existing BQ tags"/> + <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/> + <param label="Minimum base quality for a base to be considered" name="minimum_base_quality" type="integer" value="13" help="--min-BQ; default=13"/> + <param label="Only generate pileup in region" name="region_string" type="text" value="" help="--region; If used in conjunction with --positions, then considers the intersection of the two requests. Defaults to all sites" /> + </when> + <when value="basic" /> + </conditional> + </inputs> + <outputs> + <data format="pileup" label="${tool.name} on ${on_string}" name="output_mpileup"> + <change_format> + <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" /> + <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" /> + </change_format> + </data> + <data format="txt" label="${tool.name} on ${on_string} (log)" name="output_log" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param ftype="fasta" name="ref_file" value="phiX.fasta" /> + <param ftype="bam" name="input_bam" value="samtools_mpileup_in_1.bam" /> + <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> + <param name="advanced_options_selector" value="basic" /> + <param name="base_position_on_reads" value="true" /> + <param name="output_mapping_quality" value="true" /> + <output file="samtools_mpileup_out_1.pileup" name="output_mpileup" /> + <output file="samtools_mpileup_out_1.log" name="output_log" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param ftype="fasta" name="ref_file" value="phiX.fasta" /> + <param ftype="bam" name="input_bam" value="phiX.bam" /> + <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> + <param name="gap_extension_sequencing_error_probability" value="20" /> + <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> + <param name="perform_indel_calling_selector" value="perform_indel_calling" /> + <param name="skip_indel_calling_above_sample_depth" value="250" /> + <param name="gap_open_sequencing_error_probability" value="40" /> + <param name="platform_list_repeat" value="0" /> + <param name="advanced_options_selector" value="basic" /> + <param name="genotype_likelihood_computation_type|output_format" value="VCF" /> + <output file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" name="output_mpileup" /> + <output file="samtools_mpileup_out_2.log" name="output_log" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. + +------ + +**Input options**:: + + -6, --illumina1.3+ quality is in the Illumina-1.3+ encoding + -A, --count-orphans do not discard anomalous read pairs + -b, --bam-list FILE list of input BAM filenames, one per line + -B, --no-BAQ disable BAQ (per-Base Alignment Quality) + -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0] + -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [250] + -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs + -f, --fasta-ref FILE faidx indexed reference sequence file + -G, --exclude-RG FILE exclude read groups listed in FILE + -l, --positions FILE skip unlisted positions (chr pos) or regions (BED) + -q, --min-MQ INT skip alignments with mapQ smaller than INT [0] + -Q, --min-BQ INT skip bases with baseQ/BAQ smaller than INT [13] + -r, --region REG region in which pileup is generated + -R, --ignore-RG ignore RG tags (one BAM = one sample) + --rf, --incl-flags STR|INT required flags: skip reads with mask bits unset [] + --ff, --excl-flags STR|INT filter flags: skip reads with mask bits set + [UNMAP,SECONDARY,QCFAIL,DUP] + -x, --ignore-overlaps disable read-pair overlap detection + +**Output options**:: + + -o, --output FILE write output to FILE [standard output] + -g, --BCF generate genotype likelihoods in BCF format + -v, --VCF generate genotype likelihoods in VCF format + +**Output options for mpileup format** (without -g/-v):: + + -O, --output-BP output base positions on reads + -s, --output-MQ output mapping quality + +**Output options for genotype likelihoods** (when -g/-v is used):: + + -t, --output-tags LIST optional tags to output: DP,DPR,DV,DP4,INFO/DPR,SP [] + -u, --uncompressed generate uncompressed VCF/BCF output + +**SNP/INDEL genotype likelihoods options** (effective with -g/-v):: + + -e, --ext-prob INT Phred-scaled gap extension seq error probability [20] + -F, --gap-frac FLOAT minimum fraction of gapped reads [0.002] + -h, --tandem-qual INT coefficient for homopolymer errors [100] + -I, --skip-indels do not perform indel calling + -L, --max-idepth INT maximum per-sample depth for INDEL calling [250] + -m, --min-ireads INT minimum number gapped reads for indel candidates [1] + -o, --open-prob INT Phred-scaled gap open seq error probability [40] + -p, --per-sample-mF apply -m and -F per-sample for increased sensitivity + -P, --platforms STR comma separated list of platforms for indels [all] + +**Notes**: Assuming diploid individuals. +]]> + </help> + <configfiles> + <configfile name="excluded_read_groups"> +<![CDATA[ +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": + #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() ) + #end if +#end if +${pasted_data} +]]> + </configfile> + <configfile name="pasted_regions"> +<![CDATA[ +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": + #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() ) + #end if +#end if +${pasted_data} +]]> + </configfile> + </configfiles> + <expand macro="citations" /> +</tool>