view samtools_mpileup.xml @ 6:483949f5c3b0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_mpileup commit 411130b45dc30f7f24f41cdeec5e148c5d8faf40
author iuc
date Tue, 09 May 2017 11:15:58 -0400
parents 5872c9894a37
children edbe9587b15d
line wrap: on
line source

<tool id="samtools_mpileup" name="MPileup" version="2.1.3">
    <description>multi-way pileup of variants</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command><![CDATA[
    #for $bam_count, $input_bam in enumerate( $reference_source.input_bam ):
        ln -s '${input_bam}' 'localbam_${bam_count}.bam' &&
        ln -s '${input_bam.metadata.bam_index}' 'localbam_${bam_count}.bam.bai' &&
    #end for

    #if $reference_source.reference_source_selector == "history":
        ln -s '${reference_source.ref_file}' &&
        samtools faidx `basename '${reference_source.ref_file}'` &&
    #end if

    samtools mpileup
    #if $reference_source.reference_source_selector != "history":
        -f '${reference_source.ref_file.fields.path}'
    #else:
        -f '${reference_source.ref_file}'
    #end if
    #for $bam_count, $input_bam in enumerate( $reference_source.input_bam ):
        localbam_${bam_count}.bam
    #end for
    #if str( $advanced_options.advanced_options_selector ) == "advanced":
        #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
            #if $advanced_options.filter_by_flags.require_flags:
                --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])}
            #end if
            #if $advanced_options.filter_by_flags.exclude_flags:
                --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])}
            #end if
        #end if
        #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
            -l '$pasted_regions'
        #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history"
            -l '$advanced_options.limit_by_region.bed_regions'
        #end if
        #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
            -G '$excluded_read_groups'
        #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
            -G '$advanced_options.exclude_read_group.read_groups'
        #end if
        ${advanced_options.skip_anomalous_read_pairs}
        ${advanced_options.disable_probabilistic_realignment}
        -C ${advanced_options.coefficient_for_downgrading}
        -d ${advanced_options.max_reads_per_bam}
        ${advanced_options.extended_BAQ_computation}
        -q ${advanced_options.minimum_mapping_quality}
        -Q ${advanced_options.minimum_base_quality}
        #if str( $advanced_options.region_string ):
            -r '${advanced_options.region_string}'
        #end if
    #end if
    #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
        ${genotype_likelihood_computation_type.output_format}
        ${genotype_likelihood_computation_type.compressed}

        #if str( $genotype_likelihood_computation_type.output_tags ) != "None":
            --output-tags '${genotype_likelihood_computation_type.output_tags}'
        #end if

        #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
            --open-prob ${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}
            -e ${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}
            -h ${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}
            -L ${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}
            -m ${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}
            -F ${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}
            ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample}
            #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ):
                -P '${ ",".join( str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ) }'
            #end if
        #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling':
            -I
        #end if
    #else:
        ${genotype_likelihood_computation_type.base_position_on_reads}
        ${genotype_likelihood_computation_type.output_mapping_quality}
    #end if
    --output '$output_mpileup'
    ]]></command>

    <configfiles>
        <configfile name="excluded_read_groups"><![CDATA[
#set pasted_data = ''
#if str( $advanced_options.advanced_options_selector ) == "advanced":
    #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
        #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() )
    #end if
#end if
${pasted_data}
        ]]></configfile>
        <configfile name="pasted_regions"><![CDATA[
#set pasted_data = ''
#if str( $advanced_options.advanced_options_selector ) == "advanced":
    #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
        #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() )
    #end if
#end if
${pasted_data}
        ]]></configfile>
    </configfiles>

    <inputs>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
                <option value="cached">Use a built-in genome</option>
                <option value="history">Use a genome from the history</option>
            </param>
            <when value="cached">
                <param name="input_bam" type="data" format="bam" multiple="True" min="1" label="BAM file(s)">
                    <validator type="unspecified_build" />
                    <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" />
                </param>
                <param name="ref_file" type="select" label="Using reference genome">
                    <options from_data_table="fasta_indexes" />
                </param>
            </when>
            <when value="history">
                <param name="input_bam" type="data" format="bam" multiple="True" min="1" label="BAM file(s)">
                    <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
                </param>
                <param name="ref_file" type="data" format="fasta" label="Using reference genome" />
            </when>
        </conditional>
        <conditional name="genotype_likelihood_computation_type">
            <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
                <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option>
                <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option>
            </param>
            <when value="perform_genotype_likelihood_computation">
                <param name="output_format" type="select" label="Choose the output format">
                    <option value="--VCF">VCF</option>
                    <option value="--BCF">BCF</option>
                </param>
                <param name="compressed" argument="--uncompressed" type="boolean" truevalue="" falsevalue="--uncompressed" checked="False" label="Compress output" />
                <param name="output_tags" argument="--output-tags" type="select" optional="True" multiple="True" display="checkboxes" label="Optional tags to output">
                    <option value="DP">DP (Number of high-quality bases)</option>
                    <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
                    <option value="DV">DV (Number of high-quality non-reference bases)</option>
                    <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
                    <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
                    <option value="SP">SP (Phred-scaled strand bias P-value)</option>
                </param>
                <conditional name="perform_indel_calling">
                    <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
                        <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
                        <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
                        <option value="do_not_perform_indel_calling">Do not perform INDEL calling (-I)</option>
                    </param>
                    <when value="perform_indel_calling_def" />
                    <when value="perform_indel_calling">
                        <param name="gap_open_sequencing_error_probability" argument="--open-prob" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" help="Reducing this value leads to more indel calls" />
                        <param name="gap_extension_sequencing_error_probability" argument="--ext-prob" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" help="Reducing this value leads to longer indels" />
                        <param name="coefficient_for_modeling_homopolymer_errors" argument="--tandem-qual" type="integer" value="100" label="Coefficient for modeling homopolymer errors" />
                        <param name="skip_indel_calling_above_sample_depth" argument="--max-idepth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" />
                        <param name="minimum_gapped_reads_for_indel_candidates" argument="--min-ireads" type="integer" value="1" label="Minimum gapped reads for indel candidates" />
                        <param name="minimum_gapped_read_fraction" argument="--gap-frac" type="float" value="0.002" label="Minimum fraction of gapped reads" />
                        <param name="gapped_read_per_sample" argument="--per-sample-mF" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply --min-ireads and --gap-frac values on a per-sample basis" help="By default both options are applied to reads pooled from all samples"/>
                        <repeat name="platform_list_repeat" title="Platform for INDEL candidates" help="--platforms">
                            <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
                        </repeat>
                    </when>
                    <when value="do_not_perform_indel_calling" />
                </conditional>

            </when>
            <when value="do_not_perform_genotype_likelihood_computation">
                <param name="base_position_on_reads" argument="--output-BP" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" />
                <param name="output_mapping_quality" argument="--output-MQ" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" />
            </when>
        </conditional>
        <conditional name="advanced_options">
            <param name="advanced_options_selector" type="select" label="Set advanced options">
                <option selected="True" value="basic">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <conditional name="filter_by_flags">
                    <param name="filter_flags" type="select" label="Set filter by flags">
                        <option selected="True" value="nofilter">Do not filter</option>
                        <option value="filter">Filter by flags to exclude or require</option>
                    </param>
                    <when value="filter">
                        <param name="require_flags" argument="--incl-flags" type="select" multiple="True" display="checkboxes" label="Require">
                            <option value="1">Read is paired</option>
                            <option value="2">Read is mapped in a proper pair</option>
                            <option value="4">The read is unmapped</option>
                            <option value="8">The mate is unmapped</option>
                            <option value="16">Read strand</option>
                            <option value="32">Mate strand</option>
                            <option value="64">Read is the first in a pair</option>
                            <option value="128">Read is the second in a pair</option>
                            <option value="256">The alignment or this read is not primary</option>
                            <option value="512">The read fails platform/vendor quality checks</option>
                            <option value="1024">The read is a PCR or optical duplicate</option>
                        </param>
                        <param name="exclude_flags" argument="--excl-flags" type="select" multiple="True" display="checkboxes" label="Exclude">
                            <option value="1">Read is paired</option>
                            <option value="2">Read is mapped in a proper pair</option>
                            <option value="4">The read is unmapped</option>
                            <option value="8">The mate is unmapped</option>
                            <option value="16">Read strand</option>
                            <option value="32">Mate strand</option>
                            <option value="64">Read is the first in a pair</option>
                            <option value="128">Read is the second in a pair</option>
                            <option value="256">The alignment or this read is not primary</option>
                            <option value="512">The read fails platform/vendor quality checks</option>
                            <option value="1024">The read is a PCR or optical duplicate</option>
                        </param>
                    </when>
                    <when value="nofilter" />
                </conditional>
                <conditional name="limit_by_region">
                    <param name="limit_by_regions" argument="--positions" type="select" label="Select regions to call">
                        <option selected="True" value="no_limit">Do not limit</option>
                        <option value="history">From a BED file</option>
                        <option value="paste">Paste a list of regions or BED</option>
                    </param>
                    <when value="history">
                        <param name="bed_regions" type="data" format="bed" label="BED file">
                            <validator type="dataset_ok_validator" />
                        </param>
                    </when>
                    <when value="paste">
                        <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions" />
                    </when>
                    <when value="no_limit" />
                </conditional>
                <conditional name="exclude_read_group">
                    <param name="exclude_read_groups" argument="--exclude-RG" type="select" label="Select read groups to exclude">
                        <option selected="True" value="no_limit">Do not exclude</option>
                        <option value="history">From a text file</option>
                        <option value="paste">Paste a list of read groups</option>
                    </param>
                    <when value="history">
                        <param name="read_groups" type="data" format="txt" label="Text file">
                            <validator type="dataset_ok_validator" />
                        </param>
                    </when>
                    <when value="paste">
                        <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups" />
                    </when>
                    <when value="no_limit" />
                </conditional>
                <param name="ignore_overlaps" argument="--ignore-overlaps" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Disable read-pair overlap detection" />
                <param name="skip_anomalous_read_pairs" argument="--count-orphans" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" />
                <param name="disable_probabilistic_realignment" argument="--no-BAQ" type="boolean" truevalue="-B" falsevalue="" checked="False" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" help="BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments" />
                <param name="coefficient_for_downgrading" argument="--adjust-MQ" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" help="Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50" />
                <param name="max_reads_per_bam" argument="--max-depth" type="integer" max="1024" min="1" value="250" label="Max reads per BAM" />
                <param name="extended_BAQ_computation" argument="--redo-BAQ" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Redo BAQ computation" help="Ignore existing BQ tags" />
                <param name="minimum_mapping_quality" argument="--min-MQ" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
                <param name="minimum_base_quality" argument="--min-BQ" type="integer" value="13" label="Minimum base quality for a base to be considered" />
                <param name="region_string" argument="--region" type="text" value="" label="Only generate pileup in region" help="If used in conjunction with --positions, then considers the intersection of the two requests. Defaults to all sites" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_mpileup" format="pileup" label="${tool.name} on ${on_string}">
            <change_format>
                <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" />
                <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="phiX.fasta" />
            <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" />
            <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
            <param name="advanced_options_selector" value="basic" />
            <param name="base_position_on_reads" value="true" />
            <param name="output_mapping_quality" value="true" />
            <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="phiX.fasta" />
            <param name="input_bam" ftype="bam" value="phiX.bam" />
            <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
            <param name="gap_extension_sequencing_error_probability" value="20" />
            <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
            <param name="perform_indel_calling_selector" value="perform_indel_calling" />
            <param name="skip_indel_calling_above_sample_depth" value="250" />
            <param name="gap_open_sequencing_error_probability" value="40" />
            <param name="platform_list_repeat" value="0" />
            <param name="advanced_options_selector" value="basic" />
            <param name="genotype_likelihood_computation_type|output_format" value="VCF" />
            <output name="output_mpileup" file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" />
        </test>
        <test>
            <param name="reference_source_selector" value="history" />
            <param name="ref_file" ftype="fasta" value="phiX.fasta" />
            <param name="input_bam" ftype="bam" value="samtools_mpileup_in_1.bam" />
            <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
            <param name="advanced_options_selector" value="advanced" />
            <param name="minimum_base_quality" value="0" /><!-- most reads have ultra low quality resuling in empty columns -->
            <param name="base_position_on_reads" value="true" />
            <param name="output_mapping_quality" value="true" />
            <output name="output_mpileup" file="samtools_mpileup_out_3.pileup" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines.
If sample identifiers are absent, each input file is regarded as one sample.

**Notes**: Assuming diploid individuals.
    ]]></help>
    <expand macro="citations" />
</tool>