Mercurial > repos > devteam > samtools_mpileup
view samtools_mpileup.xml @ 2:3aa48bcbc599 draft
Uploaded tarball for 0.0.3 version.
author | devteam |
---|---|
date | Wed, 12 Mar 2014 12:53:30 -0400 |
parents | b47a418ccfdc |
children | da0203c3461a |
line wrap: on
line source
<tool id="samtools_mpileup" name="MPileup" version="0.0.3"> <description>SNP and indel caller</description> <requirements> <requirement type="package" version="0.1.19">samtools</requirement> </requirements> <command><![CDATA[ #if $reference_source.reference_source_selector == "history": ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup #else: samtools mpileup #end if #if $reference_source.reference_source_selector != "history": -f "${reference_source.ref_file.fields.path}" #else: -f "${reference_source.ref_file}" #end if #for $i, $input_bam in enumerate( $reference_source.input_bams ): $input_bam.input_bam #end for #if str( $advanced_options.advanced_options_selector ) == "advanced": #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": #if $advanced_options.filter_by_flags.require_flags: --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} #end if #if $advanced_options.filter_by_flags.exclude_flags: --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} #end if #end if #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": -l "$pasted_regions" #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" -l "$bed_regions" #end if #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": -G "$excluded_read_groups" #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" -G "$read_groups" #end if ${advanced_options.skip_anomalous_read_pairs} ${advanced_options.disable_probabilistic_realignment} -C "${advanced_options.coefficient_for_downgrading}" -d "${advanced_options.max_reads_per_bam}" ${advanced_options.extended_BAQ_computation} -q "${advanced_options.minimum_mapping_quality}" -Q "${advanced_options.minimum_base_quality}" #if str( $advanced_options.region_string ): -r "${advanced_options.region_string}" #end if ${advanced_options.output_per_sample_read_depth} ${advanced_options.output_per_sample_strand_bias_p_value} #end if #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': ##-g or -u -g -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} #else: -I #end if -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" #if len( $genotype_likelihood_computation_type.platform_list_repeat ): -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" #end if #else: ${genotype_likelihood_computation_type.base_position_on_reads} ${genotype_likelihood_computation_type.output_mapping_quality} #end if > "$output_mpileup" 2> "$output_log" ]]></command> <stdio> <exit_code range="1:" level="fatal" description="Error" /> </stdio> <inputs> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> <option value="cached">Locally cached</option> <option value="history">History</option> </param> <when value="cached"> <repeat name="input_bams" title="BAM file" min="1"> <param name="input_bam" type="data" format="bam" label="BAM file"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> </param> </repeat> <param name="ref_file" type="select" label="Using reference genome"> <options from_data_table="fasta_indexes" /> <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> </param> </when> <when value="history"> <repeat name="input_bams" title="BAM file" min="1"> <param name="input_bam" type="data" format="bam" label="BAM file"> <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> </param> </repeat> <param name="ref_file" type="data" format="fasta" label="Using reference file" /> </when> </conditional> <conditional name="genotype_likelihood_computation_type"> <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> </param> <when value="perform_genotype_likelihood_computation"> <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> <conditional name="perform_indel_calling"> <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> </param> <when value="perform_indel_calling"> <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" /> <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads for candidates" /> <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply minimum values on a per-sample basis" /> </when> <when value="do_not_perform_indel_calling" /> </conditional> <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> </repeat> </when> <when value="do_not_perform_genotype_likelihood_computation"> <param name="base_position_on_reads" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" /> <param name="output_mapping_quality" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" /> </when> </conditional> <conditional name="advanced_options"> <param name="advanced_options_selector" type="select" label="Set advanced options"> <option value="basic" selected="True">Basic</option> <option value="advanced">Advanced</option> </param> <when value="advanced"> <conditional name="filter_by_flags"> <param name="filter_flags" type="select" label="Set filter by flags"> <option value="nofilter" selected="True">Do not filter</option> <option value="filter">Filter by flags to exclude or require</option> </param> <when value="filter"> <param name="require_flags" type="select" display="checkboxes" multiple="True" label="Require"> <option value="1">Read is paired</option> <option value="2">Read is mapped in a proper pair</option> <option value="4">The read is unmapped</option> <option value="8">The mate is unmapped</option> <option value="16">Read strand</option> <option value="32">Mate strand</option> <option value="64">Read is the first in a pair</option> <option value="128">Read is the second in a pair</option> <option value="256">The alignment or this read is not primary</option> <option value="512">The read fails platform/vendor quality checks</option> <option value="1024">The read is a PCR or optical duplicate</option> </param> <param name="exclude_flags" type="select" display="checkboxes" multiple="True" label="Exclude"> <option value="1">Read is paired</option> <option value="2">Read is mapped in a proper pair</option> <option value="4">The read is unmapped</option> <option value="8">The mate is unmapped</option> <option value="16">Read strand</option> <option value="32">Mate strand</option> <option value="64">Read is the first in a pair</option> <option value="128">Read is the second in a pair</option> <option value="256">The alignment or this read is not primary</option> <option value="512">The read fails platform/vendor quality checks</option> <option value="1024">The read is a PCR or optical duplicate</option> </param> </when> <when value="nofilter" /> </conditional> <conditional name="limit_by_region"> <param name="limit_by_regions" type="select" label="Select regions to call"> <option value="no_limit" selected="True">Do not limit</option> <option value="history">From an uploaded BED file</option> <option value="paste">Paste a list of regions or BED</option> </param> <when value="history"> <param name="bed_regions" type="data" format="bed" label="BED file"> <validator type="dataset_ok_validator" /> </param> </when> <when value="paste"> <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions"/> </when> <when value="no_limit" /> </conditional> <conditional name="exclude_read_group"> <param name="exclude_read_groups" type="select" label="Select read groups to exclude"> <option value="no_limit" selected="True">Do not exclude</option> <option value="history">From an uploaded text file</option> <option value="paste">Paste a list of read groups</option> </param> <when value="history"> <param name="read_groups" type="data" format="txt" label="Text file"> <validator type="dataset_ok_validator" /> </param> </when> <when value="paste"> <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups"/> </when> <when value="no_limit" /> </conditional> <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> <param name="region_string" type="text" value="" label="Only generate pileup in region" /> <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> </when> <when value="basic" /> </conditional> </inputs> <configfiles> <configfile name="excluded_read_groups"> <![CDATA[ <% import re %> #set pasted_data = '' #if str( $advanced_options.advanced_options_selector ) == "advanced": #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": #set regex=re.compile("\\s+") #set pasted_data = '\t'.join( regex.split( str( $advanced_options.exclude_read_group['read_groups'] ) ) ) #end if #end if ${pasted_data} ]]> </configfile> <configfile name="pasted_regions"> <![CDATA[ <% import re %> #set pasted_data = '' #if str( $advanced_options.advanced_options_selector ) == "advanced": #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": #set regex=re.compile("\\s+") #set pasted_data = '\t'.join( regex.split( str( $advanced_options.limit_by_region['region_paste'] ) ) ) #end if #end if ${pasted_data} ]]> </configfile> </configfiles> <outputs> <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> <change_format> <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> </change_format> </data> <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> </outputs> <tests> <test> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="phiX.fasta" ftype="fasta" /> <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> <param name="advanced_options_selector" value="basic" /> <param name="base_position_on_reads" value="true" /> <param name="output_mapping_quality" value="true" /> <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" /> <output name="output_log" file="samtools_mpileup_out_1.log" /> </test> <test> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="phiX.fasta" ftype="fasta" /> <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> <param name="gap_extension_sequencing_error_probability" value="20" /> <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> <param name="perform_indel_calling_selector" value="perform_indel_calling" /> <param name="skip_indel_calling_above_sample_depth" value="250" /> <param name="gap_open_sequencing_error_probability" value="40" /> <param name="platform_list_repeat" value="0" /> <param name="advanced_options_selector" value="basic" /> <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> <output name="output_log" file="samtools_mpileup_out_2.log" /> </test> <test> <param name="reference_source_selector" value="cached" /> <param name="input_bam" value="samtools_mpileup_in_3.bam" ftype="bam" dbkey="phiX" /> <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> <param name="gap_extension_sequencing_error_probability" value="20" /> <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> <param name="perform_indel_calling_selector" value="perform_indel_calling" /> <param name="skip_indel_calling_above_sample_depth" value="250" /> <param name="gap_open_sequencing_error_probability" value="40" /> <param name="platform_list_repeat" value="0" /> <param name="advanced_options_selector" value="basic" /> <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> <output name="output_log" file="samtools_mpileup_out_3.log" /> </test> <test> <param name="reference_source_selector" value="cached" /> <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" dbkey="phiX" /> <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> <param name="gap_extension_sequencing_error_probability" value="20" /> <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> <param name="perform_indel_calling_selector" value="perform_indel_calling" /> <param name="skip_indel_calling_above_sample_depth" value="250" /> <param name="gap_open_sequencing_error_probability" value="40" /> <param name="platform_list_repeat" value="0" /> <param name="advanced_options_selector" value="advanced" /> <param name="advanced_options|filter_by_flags|filter_flags" value="nofilter" /> <param name="advanced_options|limit_by_region|limit_by_regions" value="no_limit" /> <param name="advanced_options|coefficient_for_downgrading" value="true" /> <param name="advanced_options|max_reads_per_bam" value="200" /> <param name="advanced_options|extended_BAQ_computation" value="true" /> <param name="advanced_options|minimum_mapping_quality" value="0" /> <param name="advanced_options|minimum_base_quality" value="43" /> <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_4.bcf" lines_diff="1" /> <output name="output_log" file="samtools_mpileup_out_4.log" /> </test> </tests> <help> **What it does** Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. ------ .. list-table:: **Input options** :widths: 5 5 40 10 :header-rows: 1 * - Flag - Type - Description - Default * - -6 - *BOOLEAN* - assume the quality is in the Illumina-1.3+ encoding - off * - -A - *BOOLEAN* - count anomalous read pairs - off * - -B - *BOOLEAN* - disable BAQ computation - off * - -b - *FILE* - list of input BAM filenames, one per line - *null* * - -C - *INT* - parameter for adjusting mapQ; 0 to disable - 0 * - -d - *INT* - max per-BAM depth to avoid excessive memory usage - 250 * - -E - *BOOLEAN* - recalculate extended BAQ on the fly thus ignoring existing BQs - off * - -f - *FILE* - faidx indexed reference sequence file - *null* * - -G - *FILE* - exclude read groups listed in FILE - *null* * - -l - *FILE* - list of positions (chr pos) or regions (BED) - *null* * - -M - *INT* - cap mapping quality at INT - 60 * - -r - *STR* - region in which pileup is generated - *null* * - -R - *BOOLEAN* - ignore RG tags - off * - -q - *INT* - skip alignments with mapQ smaller than INT - 0 * - -Q - *INT* - skip bases with baseQ/BAQ smaller than INT - 13 * - --rf - *INT* - required flags: skip reads with mask bits unset - 0 * - --ff - *INT* - filter flags: skip reads with mask bits set - 0 ------ .. list-table:: **Output options** :widths: 5 5 40 10 :header-rows: 1 * - Flag - Type - Description - Default * - -D - *BOOLEAN* - output per-sample DP in BCF (require -g/-u) - off * - -g - *BOOLEAN* - generate BCF output (genotype likelihoods) - off * - -O - *BOOLEAN* - output base positions on reads (disabled by -g/-u) - off * - -s - *BOOLEAN* - output mapping quality (disabled by -g/-u) - off * - -S - *BOOLEAN* - output per-sample strand bias P-value in BCF (require -g/-u) - off * - -u - *BOOLEAN* - generate uncompressed BCF output - off ------ .. list-table:: **SNP/INDEL genotype likelihoods options (effective with '-g' or '-u')** :widths: 5 5 40 10 :header-rows: 1 * - Flag - Type - Description - Default * - -e - *INT* - Phred-scaled gap extension seq error probability - 20 * - -F - *FLOAT* - minimum fraction of gapped reads for candidates - 0.002 * - -h - *INT* - coefficient for homopolymer errors - 100 * - -I - *BOOLEAN* - do not perform indel calling - off * - -L - *INT* - max per-sample depth for INDEL calling - 250 * - -m - *INT* - minimum gapped reads for indel candidates - 1 * - -o - *INT* - Phred-scaled gap open sequencing error probability - 40 * - -p - *BOOLEAN* - apply -m and -F per-sample to increase sensitivity - off * - -P - *STR* - comma separated list of platforms for indels - all ------ **Citation** For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* </help> </tool>